mc <- 1000
#set.seed(21218)
#set.seed(317)
set.seed(301)
Y <- foreach(i = 1:mc) %do% {
N <- 100
mu0 <- c(0,1,10,11)
sig20 <- c(0.1,0.1,0.1,0.1)
pi0 <- c(1/4,1/4,1/4,1/4)
#Z <- apply(rmultinom(N, 1, c(1/4,1/4,1/4,1/4)), 1, sum)
Z <- rep(25,4)
lab <- Reduce(c, mapply(function(x,y) rep(x, each = y),1:4, Z))
tmp <- list()
for(i in 1:length(Z)){
tmp[[i]] <-
rnorm(Z[i], mean = mu0[i], sd = sqrt(sig20[i]))
}
X <- Reduce(c, tmp)
XZ <- data.frame(cbind(X,lab))
list(XZ = XZ, N=N, mu0=mu0, sig20=sig20, pi0=pi0, lab=lab)
}
For each of 1000 Monte Carlo iterates we sample 100 points from a mixture of four Gaussians with means 0, 1, 10, 11 and equal variances of 0.1.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.07 0.26 0.22 0.29 0.50
## [1] 0.06855749 0.37136251
mckhat <- sapply(mcU, '[[', 6)
#hist(mckhat, main = "Khat from MClust")
kh <- sapply(h, function(x) max(unique(x$dat$labels$col)))
kh[which(is.na(kh))] <- 1
khat <- kh
KHAT <- data.frame(khat = c(khat, mckhat), cla = rep(c("HGMM", "MCkhat"),each=1e3))
str(KHAT)
## 'data.frame': 2000 obs. of 2 variables:
## $ khat: num 3 2 3 3 3 3 3 4 4 3 ...
## $ cla : Factor w/ 2 levels "HGMM","MCkhat": 1 1 1 1 1 1 1 1 1 1 ...
gg1 <- ggplot(dat = KHAT, aes(x = khat, fill = as.factor(cla))) +
geom_histogram(position = "dodge", bins = 10) +
ggtitle("Frequency plot of Khats")
print(gg1)
hist(khat - mckhat, main = "HGMM_khat - Mclust_khat")