Homepage
The formatted source code for this file is here.
And a raw version here.
Previous work by Youngser Park can be found here.
From email correspondence with Kristina Micheva we have the following definitions of the given markers and their corresponding class colors used throughout this document. The abbreviations are presented as they were given.
On Feb 8, 2016, at 2:00 PM, Kristina Micheva kmicheva@stanford.edu wrote:
Notes from correspondence : - Synap
is listed twice, this is not a mistake as it was used twice with different antibodies – I was not told which. - VGlut1
is also listed twice. It was measured at two different times using the same antibody. - NOS
is listed in two different categories and the analysis so far does not allow that. It will be considered as Excitatory postsynaptic from here on.
We will be using the data scaled to \([0,1000]\) for this exploration.
fs
: The feature vector scaled between \([0,1000]\).Level 0 refers to the un-clustered data, that is all of the data lives in one cluster.
dat <- fs
dat2 <- fs2
## Formatting data for heatmap by taking the column means.
## heatmap.2 requires at least two rows and columns.
aggp <- apply(dat, 2, mean)
aggp <- t(cbind(aggp, aggp))[, ford]
The following is a heatmap showing the averages of the marker values over the entire dataset. The reader should notice that the markers believed to be excitatory have higher means than do markersthe difference between groups of markers as compared with their believed classes.
heatmap.2(as.matrix(aggp),dendrogram='none',Colv=NA,trace="none",
col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,
symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs` data.",
labRow = "")
set.seed(1024)
s2 <- sample(dim(dat)[1], 1e4)
ggJdat <- data.table(cbind(stack(dat[s2]),L[s2]))
ggJdat$ind <- factor(ggJdat$ind, ordered=TRUE, levels=names(dat)[ford])
ggJ0 <-
ggplot(data = ggJdat, aes(x = ind, y = values)) +
geom_point(alpha=0.25) +
geom_jitter(width = 1) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))
print(ggJ0)
The above scatter plot is a random sample of the data points.
cmatfs <- cor(fs)[ford, ford]
cmatfs2 <- cor(fs2)[ford, ford]
corrplot(cmatfs,method="color",tl.col=ccol[ford], tl.cex=1)
pcaL0 <- prcomp(cmatfs, center=TRUE, scale=TRUE)
pcaL02 <- prcomp(cmatfs2, center=TRUE, scale=TRUE)
pca0 <- data.frame(pcaL0$x)
pca02 <- data.frame(pcaL02$x)
rgl::plot3d(pca0[,1],pca0[,2],pca0[,3],type='s',col=ccol3[ford], size=1,
xlab = "PC1", ylab = "PC2", zlab = "PC3")
rgl::rgl.texts(pca0[,1],pca0[,2],pca0[,3],abbreviate(rownames(pca0)), col=ccol3[ford], adj=c(0,2))
title3d(main = "Lv 0")
subid <- currentSubscene3d()
rglwidget(elementId="rgl-pca0",width=720,height=720)
#rgl::plot3d(pca02[,1],pca02[,2],pca02[,3],type='s',col=ccol3[ford], size=1,
# xlab = "PC1", ylab = "PC2", zlab = "PC3")
#abclines3d(0, 0, 0, a = diag(3), col = "gray")
#rgl::rgl.texts(pca02[,1],pca02[,2],pca02[,3],abbreviate(rownames(pca02)), col=ccol3[ford], adj=c(0,2))
Using LDA with re-substitution to create a voronoi diagram for Lv 0.
tr <- factor(exType, ordered = FALSE)
lda.fit0 <- lda(tr ~ ., data = pca0[, 1:10])
lda.pred0 <- predict(lda.fit0)
titlesvor <- paste("LDA decision boundaries for", paste0("F", 0:5))
voronoidf <- data.frame(lda.fit0$means)
#voronoidf <- data.frame(x=lda.fit$means[,1],y=lda.fit$means[,2])
#This creates the voronoi line segments
plot(pca0[,1:2], col=ccol3[ford], pch=20, cex=1.5, xlim =
c(min(pca0[,1:2]) -3, 5))
text(pca0[,1:2], labels=rownames(pca0),
pos=ifelse(exType %in% c('ex', 'in'), 2, 4),
col=ccol3[ford], cex=1.2)
deldir(x = voronoidf[,1],y = voronoidf[,2],
rw = c(-15,15,-15,15),
plotit=TRUE, add=TRUE, wl='te')
text(voronoidf[,1:2], labels=rownames(voronoidf), cex=1.25, pos=2)
Staring from the correlation matrix cmatfs
we compute the class means; \(v_1 = column\_mean(\)Excitatory\()\), \(v_2 = column\_mean(\)Inhibitory\()\), \(v_3 = column\_mean(\)Other\()\).
We then compute \(r_1 = v_1 - v_2\) and \(r_2 = v_1 - v_3\).
We then multiply \([cmatfs] \cdot [r_1 |\, r_2]\). The transformed points are then plotted below.
tmp <- cmatfs; diag(tmp) <- 0
exMat <- tmp[exType == 'ex',] -> g1
inMat <- tmp[exType == 'in',] -> g2
otMat <- tmp[exType == 'other',] -> g3
mEx <- apply(exMat, 2, mean) -> v1
mIn <- apply(inMat, 2, mean) -> v2
mOt <- apply(otMat, 2, mean) -> v3
mXI <- mEx - mIn
mXO <- mEx - mOt
rotM <- data.frame(XI = mXI, XO = mXO) -> r1r2
colnames(r1r2) <- c("r1", "r2")
mdm2 <- data.frame(cmatfs %*% as.matrix(rotM))
mdm1 <- data.frame(XI = mdm2[,1], row.names=rownames(mdm2))
newFord <- order(mdm2$XI)
heatmap.2(as.matrix(g1[, newFord]),dendrogram='none',
Colv=NA,trace="none",
col=mycol,colCol=ccol3[ford][newFord],
cexRow=0.8, keysize=1.25,
symkey=FALSE,symbreaks=FALSE,
scale="none", srtCol=90,
main="Excitatory")
heatmap.2(as.matrix(g2[, newFord]),dendrogram='none',
Colv=NA,trace="none",
col=mycol,colCol=ccol3[ford][newFord],
cexRow=0.8, keysize=1.25,
symkey=FALSE,symbreaks=FALSE,
scale="none", srtCol=90,
main="Inhibatory")
heatmap.2(as.matrix(g3[, newFord]),dendrogram='none',
Colv=NA,trace="none",
col=mycol,colCol=ccol3[ford][newFord],
cexRow=0.8, keysize=1.25,
symkey=FALSE,symbreaks=FALSE,
scale="none", srtCol=90,
main="Other")
rm(tmp)
tmp <- t(cbind(v1, v1))[, newFord]
heatmap.2(as.matrix(tmp),dendrogram='none',
Colv=NA,trace="none", key= FALSE,
col=mycol,colCol=ccol3[ford][newFord],
cexRow=0.8, keysize=1.25,
symkey=FALSE,symbreaks=FALSE,
scale="none", srtCol=90, labRow='',
main="v1 = Mean Excitatory")
tmp <- t(cbind(v2, v2))[, newFord]
heatmap.2(as.matrix(tmp),dendrogram='none',
Colv=NA,trace="none", key = FALSE,
col=mycol,colCol=ccol3[ford][newFord],
cexRow=0.8, keysize=1.25,
symkey=FALSE,symbreaks=FALSE,
scale="none", srtCol=90, labRow='',
main="v2 = Mean Inhibitory")
tmp <- t(cbind(v3, v3))[, newFord]
heatmap.2(as.matrix(tmp),dendrogram='none',
Colv=NA,trace="none", key = FALSE,
col=mycol,colCol=ccol3[ford][newFord],
cexRow=0.8, keysize=1.25,
symkey=FALSE,symbreaks=FALSE,
scale="none", srtCol=90, labRow='',
main="v3 = Mean Other")
rm(tmp)
heatmap.2(as.matrix(t(r1r2)[, newFord]),dendrogram='none',
Colv=NA,trace="none",
col=mycol,colCol=ccol3[ford][newFord],
cexRow=0.8, keysize=1.25,
symkey=FALSE,symbreaks=FALSE,
scale="none", srtCol=90,
main="Mean difference vectors")
plot(mdm2, col = ccol3[ford], pch = 20, cex=1)
abline(h = 0, v = 0)
text(mdm2, labels=rownames(mdm2), col=ccol3[ford], cex = 0.75, pos=1)
lm.fit <- lm(mdm2[,2] ~ mdm2[,1])
rL <- list(a = lm.fit$coefficients[1],
b = lm.fit$coefficients[2])
L1 <- function(x){ rL$a + rL$b * x }
summary(lm.fit)
abline(rL$a,rL$b, col = 'darkorange', lty=2)
text(-0.1,L1(0.1) , labels=expression(L[1]), col='darkorange', cex = 1)
legend(list(x= -1.5,y=1.6), legend = c('regression line'), col = 'darkorange', lty=2)
title("Mean-difference projections of markers")
#
# Call:
# lm(formula = mdm2[, 2] ~ mdm2[, 1])
#
# Residuals:
# Min 1Q Median 3Q Max
# -0.23453 -0.09830 -0.00353 0.06728 0.23253
#
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.10363 0.02754 3.763 0.00107 **
# mdm2[, 1] 0.76298 0.03786 20.153 1.14e-15 ***
# ---
# Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#
# Residual standard error: 0.127 on 22 degrees of freedom
# Multiple R-squared: 0.9486, Adjusted R-squared: 0.9463
# F-statistic: 406.2 on 1 and 22 DF, p-value: 1.136e-15
The following is the projection onto \(r_1\).
set.seed(2^8)
print(ggplot(mdm1, aes(x = XI, y = 0, label=rownames(mdm1))) +
geom_point(color = ccol3[ford]) +
geom_text(color = ccol3[ford], check_overlap=FALSE,
position='jitter'))
We will now project the individual synapse points onto the
line \(L_1\) by first projecting into the mean-difference space then projecting onto \(L_1\) followed by a rotation into \(\mathbb{R}\).
mfs <- as.matrix(fs)
r <- as.matrix(mdm1)
projR1 <- mfs %*% r
plot(density(projR1))
if(FALSE){
require(ggvis)
data.frame(projR1) %>% ggvis(x = ~XI) %>%
layer_densities(
adjust = input_slider(.1, 2, value = 1, step = .1, label = "Bandwidth adjustment"),
kernel = input_select(
c("Gaussian" = "gaussian",
"Epanechnikov" = "epanechnikov",
"Rectangular" = "rectangular",
"Triangular" = "triangular",
"Biweight" = "biweight",
"Cosine" = "cosine",
"Optcosine" = "optcosine"),
label = "Kernel")
)
}
#bic on mdm1
n <- nrow(mdm1)
d <- ncol(mdm1)
G <- 3
emDat <- cbind(mdm1, type=as.numeric(exType))
emEst <- me(modelName = 'V', data=as.matrix(emDat[, -2]), unmap(emDat[,2]))
#names(emEst)
#args(bic)
#do.call('bic', emEst)
B <- foreach(i = 1:25, .combine='c') %do% {
bic(modelName="V", loglik=emEst$loglik, n=n, d=d, G=i)
}
plot(B, type = 'b')
svdfs <- svd(as.matrix(fs)[,newFord])
rightSV <- data.frame(t(svdfs$v[,1:2]))
colnames(rightSV) <- rownames(mdm2[order(mdm2$XI),])
heatmap.2(as.matrix(rightSV),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol3[ford][newFord],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="r. singular vectors of 'fs' data")
We run a Hierachical K-means++ for \(K=2\) on the fs
data with 4 levels.
set.seed(2^13)
L <- bhkmpp(dat,blevels=4)
## Formatting data for heatmap
aggp <- aggregate(dat,by=list(lab=L[[1]]),FUN=mean)
aggp <- as.matrix(aggp[,-1])[, ford]
rownames(aggp) <- clusterFraction(L[[1]])
The following are heatmaps generated from clustering via K-means++ (at level 1)
heatmap.2(as.matrix(aggp),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs` data.")
Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(4,"Set1")[order(table(L[[1]]))]
cf1 <- data.frame(cf = clusterFraction(L[[1]]))
ggJ1 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv1))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[c(2,20)], y = 1.15*max(ggJdat$values),
label= cf1[1:2,]) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))
print(ggJ1)
corkp1 <- cor(dat[L[[1]] == 1,])[ford, ford]
corkp2 <- cor(dat[L[[1]] == 2,])[ford, ford]
difCor12 <- (corkp1 - corkp2)
layout(matrix(c(1,2,3,3), 2, 2, byrow=TRUE))
corrplot(corkp1,method="color",tl.col=ccol[ford], tl.cex=0.8, mar=c(0,0,3,0))
title("Cluster 1")
corrplot(corkp2,method="color",tl.col=ccol[ford], tl.cex=0.8, mar=c(0,0,3,0))
title("Cluster 2")
corrplot(difCor12,is.corr=FALSE,method="color",
tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3","white","darkorange"))(50))
title("Difference(1,2)")
Notice that the non-synaptic markers change very little between clusters. Also note that the correlations between (gad, VGAT, PV, Gephyr
) and VGlut1
at both times change significantly between clusters.
Next we compute PCA for the within cluster correlation matrices and embed in 3d.
pcaL <- lapply(list(corkp1, corkp2), prcomp, center=TRUE, scale=TRUE)
elB <- lapply(pcaL, function(x) {getElbows(x$sdev, plot=FALSE)})
pca <- pcaL[[1]]$x
rgl::plot3d(pca[,1],pca[,2],pca[,3],type='s',col=ccol3[ford], size=1,
xlab = "PC1", ylab = "PC2", zlab = "PC3")
rgl::rgl.texts(pca[,1],pca[,2],pca[,3],abbreviate(rownames(pca)), col=ccol3[ford], adj=c(0,2))
title3d(main = "Cluster 1: Lv 1")
subid <- currentSubscene3d()
rglwidget(elementId="rgl-pca1",width=720,height=720)
pca <- pcaL[[2]]$x
rgl::plot3d(pca[,1],pca[,2],pca[,3],type='s',col=ccol3[ford], size=1,
xlab = "PC1", ylab = "PC2", zlab = "PC3")
rgl::rgl.texts(pca[,1],pca[,2],pca[,3],abbreviate(rownames(pca)), col=ccol3[ford], adj=c(0,2))
title3d(main = "Cluster 2: Lv 1")
subid <- currentSubscene3d()
rglwidget(elementId="rgl-pca2",width=720,height=720)
lda.fit <-
lapply(list(pcaL[[1]]$x[,1:elB[[1]][2]],
pcaL[[2]]$x[,1:elB[[2]][2]]),
function(y) {
lda(tr ~ ., data = as.data.frame(y))
})
titlesvor <- paste("LDA decision boundaries for", paste0("C", 1:2))
voronoidf <- lapply(lapply(lda.fit, '[[', 3), data.frame)
#This creates the voronoi line segments
par(mfrow = c(1,2))
for(i in 1:length(pcaL)){
plot(pcaL[[i]]$x[,1:2], col=ccol3[ford], pch=20, cex=1.5)
title(titlesvor[i])
text(pcaL[[i]]$x[,1:2], labels=rownames(pcaL[[i]]$x),
pos=ifelse(pcaL[[i]]$x[,1]<max(pcaL[[i]]$x[,1] -0.5),4,2),
col=ccol3[ford], cex=1.2)
deldir(x = voronoidf[[i]][,1],y = voronoidf[[i]][,2], rw = c(-15,15,-15,15),
plotit=TRUE, add=TRUE, wl='te')
text(voronoidf[[i]], labels=rownames(voronoidf[[i]]), cex=1.5, pos=1)
}
We show a 3d scatter plot of the data according to spatial location colored according to cluster labels as determined by k-means++.
The reader should notice from this figure that there does not seem to be any correlation between the spatial information and cluster labels.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs1 <- loc[s1,]
locs1$cluster <- L[[1]][s1]
plot3d(locs1$V1,locs1$V2,locs1$V3,
col=brewer.pal(4,"Set1")[order(table(L[[1]]))][locs1$cluster],
alpha=0.75,
xlab='x',
ylab='y',
zlab='z')
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocations", height=720, width=720)
## Formatting data for heatmap
aggp2 <- aggregate(dat,by=list(lab=L[[2]]),FUN=function(x){mean(x)})
aggp2 <- as.matrix(aggp2[,-1])[, ford]
rownames(aggp2) <- clusterFraction(L[[2]])
The following are heatmaps generated from clustering via K-means++
heatmap.2(as.matrix(aggp2),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs` data.")
Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(8,"Set1")[order(table(L[[2]]))]
cf2 <- data.frame(cf = clusterFraction(L[[2]]))
ggJ2 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv2))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[c(2,8,14,20)], y = 1.15*max(ggJdat$values),
label= cf2[1:4,]) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))
print(ggJ2)
The fraction of data points within each cluster are given at the top of the plot window.
corLV2 <- lapply(c(1:4),function(x){cor(dat[L[[2]] == x,])[ford, ford]})
difCor1112 <- ((corLV2[[1]] - corLV2[[2]]))
difCor2122 <- ((corLV2[[3]] - corLV2[[4]]))
layout(matrix(c(1,2,3,3,4,5,6,6), 4, 2, byrow=TRUE))
corrplot(corLV2[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 1")
corrplot(corLV2[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 2")
corrplot(difCor1112, method="color", tl.col=ccol[ford],
tl.cex=0.8,
mar = c(0,0,3,0),
cl.lim = c(min(difCor1112,difCor2122),max(difCor1112,difCor2122)),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(100))
title("Difference(1,2)")
corrplot(corLV2[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 3")
corrplot(corLV2[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 4")
corrplot(difCor2122, method="color", tl.col=ccol[ford],
tl.cex=0.8,
mar=c(0,0,3,0),
cl.lim = c(min(difCor1112,difCor2122),max(difCor1112,difCor2122)),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(100))
title("Difference(3,4)")
pcaL <- lapply(corLV2, prcomp, center=TRUE, scale=TRUE)
elB <- lapply(pcaL, function(x) {getElbows(x$sdev, plot=FALSE)})
pcaLel2 <- mapply(function(x,y){ x$x[,1:y[2]] }, pcaL, elB)
lda.fit <-
lapply(pcaLel2,
function(y) {
lda(tr ~ ., data = as.data.frame(y))
})
titlesvor <- paste("LDA decision boundaries for", paste0("C", 1:4))
voronoidf <- lapply(lapply(lda.fit, '[[', 3), data.frame)
#This creates the voronoi line segments
par(mfrow = c(2,2))
for(i in 1:length(pcaL)){
plot(pcaL[[i]]$x[,1:2], col=ccol3[ford], pch=20, cex=1.5)
title(titlesvor[i])
text(pcaL[[i]]$x[,1:2], labels=rownames(pcaL[[i]]$x),
pos=ifelse(pcaL[[i]]$x[,1]<max(pcaL[[i]]$x[,1] -0.5),4,2),
col=ccol3[ford], cex=1.2)
deldir(x = voronoidf[[i]][,1],y = voronoidf[[i]][,2], rw = c(-15,15,-15,15),
plotit=TRUE, add=TRUE, wl='te')
text(voronoidf[[i]], labels=rownames(voronoidf[[i]]), cex=1.5, pos=1)
}
Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs2 <- loc[s1,]
locs2$cluster <- L[[2]][s1]
YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
col.pal <- colorRampPalette(YlOrBr)
plot3d(locs2$V1,locs2$V2,locs2$V3,
#col=colorpanel(8,"brown","blue")[order(table(L[[2]]))][locs2$cluster],
col=col.pal(8)[-seq(1,8,2)][order(table(L[[2]]))][locs2$cluster],
alpha=0.75,
xlab='x',
ylab='y',
zlab='z'
)
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV2", height=720, width=720)
## Formatting data for heatmap
aggp3 <- aggregate(dat,by=list(lab=L[[3]]),FUN=function(x){mean(x)})
aggp3 <- as.matrix(aggp3[,-1])[, ford]
rownames(aggp3) <- clusterFraction(L[[3]])
The following are heatmaps generated from clustering via K-means++
heatmap.2(as.matrix(aggp3),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs` data.")
Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(8,"Set1")[order(table(L[[3]]))]
cf3 <- data.frame(cf = clusterFraction(L[[3]]))
ggJ3 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv3))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[seq(2,22,length=8)], y = 1.05*max(ggJdat$values),
label= cf3[1:8,]) +
#geom_jitter(width=2) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))
print(ggJ3)
corLV3 <- lapply(c(1:8),function(x){cor(dat[L[[3]] == x,])[ford, ford]})
difCor1 <- (corLV3[[1]] - corLV3[[2]])
difCor2 <- (corLV3[[3]] - corLV3[[4]])
difCor3 <- (corLV3[[5]] - corLV3[[6]])
difCor4 <- (corLV3[[7]] - corLV3[[8]])
M <- max(difCor1, difCor2, difCor3, difCor4)
m <- min(difCor1, difCor2, difCor3, difCor4)
layout(matrix(c(1, 2, 3, 3,
4, 5, 6, 6,
7, 8, 9, 9,
10, 11, 12, 12), 8,2, byrow=TRUE))
corrplot(corLV3[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 1')
corrplot(corLV3[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 2')
corrplot(difCor1,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim=c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(1,2)')
corrplot(corLV3[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 3')
corrplot(corLV3[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 4')
corrplot(difCor2,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(3,4)')
corrplot(corLV3[[5]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 5')
corrplot(corLV3[[6]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 6')
corrplot(difCor3,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(5,6)')
corrplot(corLV3[[7]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 7')
corrplot(corLV3[[8]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 8')
corrplot(difCor4,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(7,8)')
pcaL <- lapply(corLV3, prcomp, center=TRUE, scale=TRUE)
elB <- lapply(pcaL, function(x) {getElbows(x$sdev, plot=FALSE)})
pcaLel3 <- mapply(function(x,y){ x$x[,1:y[2]] }, pcaL, elB)
lda.fit <-
lapply(pcaLel3,
function(y) {
lda(tr ~ ., data = as.data.frame(y))
})
titlesvor <- paste("LDA decision boundaries for", paste0("C", 1:8))
voronoidf <- lapply(lapply(lda.fit, '[[', 3), data.frame)
#This creates the voronoi line segments
par(mfrow = c(4,2))
for(i in 1:length(pcaL)){
plot(pcaL[[i]]$x[,1:2], col=ccol3[ford], pch=20, cex=1.5)
title(titlesvor[i])
text(pcaL[[i]]$x[,1:2], labels=rownames(pcaL[[i]]$x),
pos=ifelse(pcaL[[i]]$x[,1]<max(pcaL[[i]]$x[,1] -0.5),4,2),
col=ccol3[ford], cex=1.2)
deldir(x = voronoidf[[i]][,1],y = voronoidf[[i]][,2], rw = c(-15,15,-15,15),
plotit=TRUE, add=TRUE, wl='te')
text(voronoidf[[i]], labels=rownames(voronoidf[[i]]), cex=1.5, pos=1)
}
Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs3 <- loc[s1,]
locs3$cluster <- L[[3]][s1]
plot3d(locs3$V1,locs3$V2,locs3$V3,
col=col.pal(16)[-seq(1,8,2)][order(table(L[[3]]))][locs3$cluster],
alpha=0.65,
xlab='x',
ylab='y',
zlab='z'
)
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV3", height=720, width=720)