mclustAddons is a contributed R package that extends
the functionality available in the mclust package
(Scrucca et al. 2016, Scrucca et al. 2023).
In particular, the following methods are included:
density estimation for data with bounded support (Scrucca, 2019);
modal clustering using modal EM algorithm for Gaussian mixtures (Scrucca, 2021);
entropy estimation via Gaussian mixture modeling (Robin & Scrucca, 2023).
This document gives a quick tour of mclustAddons (version 0.8). It was written in R Markdown, using the knitr package for production.
References on the methodologies implemented are provided at the end of this document.
x <- rchisq(200, 3)
xgrid <- seq(-2, max(x), length=1000)
f <- dchisq(xgrid, 3) # true density
dens <- densityMclustBounded(x, lbound = 0)
summary(dens, parameters = TRUE)
## ── Density estimation for bounded data via GMMs ───────────
##
## Boundaries: x
## lower 0
## upper Inf
##
## Model E (univariate, equal variance) model with 1 component
## on the transformation scale:
##
## log-likelihood n df BIC ICL
## -390.0517 200 3 -795.9983 -795.9983
##
## x
## Range-power transformation: 0.3715163
##
## Mixing probabilities:
## 1
## 1
##
## Means:
## 1
## 0.9191207
##
## Variances:
## 1
## 1.309037
plot(dens, what = "density")
lines(xgrid, f, lty = 2)
x <- rbeta(200, 5, 1.5)
xgrid <- seq(-0.1, 1.1, length=1000)
f <- dbeta(xgrid, 5, 1.5) # true density
dens <- densityMclustBounded(x, lbound = 0, ubound = 1)
summary(dens, parameters = TRUE)
## ── Density estimation for bounded data via GMMs ───────────
##
## Boundaries: x
## lower 0
## upper 1
##
## Model E (univariate, equal variance) model with 1 component
## on the transformation scale:
##
## log-likelihood n df BIC ICL
## 113.3782 200 3 210.8615 210.8615
##
## x
## Range-power transformation: -0.2033281
##
## Mixing probabilities:
## 1
## 1
##
## Means:
## 1
## 1.143687
##
## Variances:
## 1
## 0.5855875
plot(dens, what = "density")
x1 <- rchisq(200, 3)
x2 <- 0.5*x1 + sqrt(1-0.5^2)*rchisq(200, 5)
x <- cbind(x1, x2)
dens <- densityMclustBounded(x, lbound = c(0,0))
summary(dens, parameters = TRUE)
## ── Density estimation for bounded data via GMMs ───────────
##
## Boundaries: x1 x2
## lower 0 0
## upper Inf Inf
##
## Model EEE (ellipsoidal, equal volume, shape and orientation) model with 1 component
## on the transformation scale:
##
## log-likelihood n df BIC ICL
## -828.0809 200 7 -1693.25 -1693.25
##
## x1 x2
## Range-power transformation: 0.3341748 0.3161391
##
## Mixing probabilities:
## 1
## 1
##
## Means:
## [,1]
## [1,] 0.9293059
## [2,] 2.0913930
##
## Variances:
## [,,1]
## [,1] [,2]
## [1,] 1.2215718 0.4676762
## [2,] 0.4676762 0.7072992
plot(dens, what = "BIC")
The data consist in the lengths of 86 spells of psychiatric treatment undergone by control patients in a suicide study (Silverman, 1986).
data("suicide")
dens <- densityMclustBounded(suicide, lbound = 0)
summary(dens, parameters = TRUE)
## ── Density estimation for bounded data via GMMs ───────────
##
## Boundaries: suicide
## lower 0
## upper Inf
##
## Model E (univariate, equal variance) model with 1 component
## on the transformation scale:
##
## log-likelihood n df BIC ICL
## -497.8204 86 3 -1009.004 -1009.004
##
## suicide
## Range-power transformation: 0.1929267
##
## Mixing probabilities:
## 1
## 1
##
## Means:
## 1
## 6.700073
##
## Variances:
## 1
## 7.788326
plot(dens, what = "density",
lwd = 2, col = "dodgerblue2",
data = suicide, breaks = 15,
xlab = "Length of psychiatric treatment")
rug(suicide)
This dataset provides the proportion of white student enrollment in 56 school districts in Nassau County (Long Island, New York), for the 1992-1993 school year (Simonoff 1996, Sec. 3.2).
data("racial")
x <- racial$PropWhite
dens <- densityMclustBounded(x, lbound = 0, ubound = 1)
summary(dens, parameters = TRUE)
## ── Density estimation for bounded data via GMMs ───────────
##
## Boundaries: x
## lower 0
## upper 1
##
## Model E (univariate, equal variance) model with 1 component
## on the transformation scale:
##
## log-likelihood n df BIC ICL
## 42.4598 56 3 72.84355 72.84355
##
## x
## Range-power transformation: 0.3869476
##
## Mixing probabilities:
## 1
## 1
##
## Means:
## 1
## 2.795429
##
## Variances:
## 1
## 5.253254
plot(dens, what = "density",
lwd = 2, col = "dodgerblue2",
data = x, breaks = 15,
xlab = "Proportion of white student enrolled in schools")
rug(x)
MEM <- MclustMEM(GMM)
summary(MEM)
## ── Modal EM for GMMs ───────────────────
##
## Data dimensions = 600 x 2
## Mclust model = EEV,6
## MEM iterations = 17
## Number of modes = 4
##
## Modes:
## X1 X2
## mode1 8.06741504 -0.01772230
## mode2 8.07370160 4.98485099
## mode3 1.10622966 4.97230749
## mode4 -0.01639289 0.06464381
##
## Modal clustering:
## 1 2 3 4
## 118 122 228 132
plot(MEM)