Homepage
The formatted source code for this file is here.
And a raw version here.
Previous work by Youngser Park can be found here.
We now have the following data sets:
featF1: The feature vector looking only at the integrated brightness features.fs: The feature vector scaled between \([0,1000]\).dat <- fs## Formatting data for heatmap
aggp <- apply(dat, 2, mean)
aggp <- t(cbind(aggp, aggp))[, ford]The following are heatmaps generated from clustering via K-means++ (at level 1)
heatmap.2(as.matrix(aggp),dendrogram='none',Colv=NA,trace="none",
col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,
symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.",
labRow = "") Percentage of data within cluster is presented on the right side of the heatmap.
set.seed(1024)
s2 <- sample(dim(dat)[1], 1e4)
ggJdat <- data.table(cbind(stack(dat[s2]),L[s2]))
ggJdat$ind <- factor(ggJdat$ind, ordered=TRUE, levels=names(dat)[ford])
ggJ0 <-
ggplot(data = ggJdat, aes(x = ind, y = values)) +
geom_point(alpha=0.75) +
geom_jitter(width = 1) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))print(ggJ0)The above scatter plot is a random sample of the data points.
cmat <- cor(fs)[ford, ford]
corrplot(cmat,method="color",tl.col=ccol[ford], tl.cex=1)We run a Hierachical K-means++ for \(K=2\) on the fs F1 data with 4 levels.
set.seed(2^13)
L <- bhkmpp(dat,blevels=4)## Formatting data for heatmap
aggp <- aggregate(dat,by=list(lab=L[[1]]),FUN=mean)
aggp <- as.matrix(aggp[,-1])[, ford]
rownames(aggp) <- clusterFraction(L[[1]])The following are heatmaps generated from clustering via K-means++ (at level 1)
heatmap.2(as.matrix(aggp),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.") Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(4,"Set1")[order(table(L[[1]]))]
cf1 <- data.frame(cf = clusterFraction(L[[1]]))
ggJ1 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv1))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[c(2,20)], y = 1.15*max(ggJdat$values),
label= cf1[1:2,]) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))print(ggJ1)corkp1 <- cor(dat[L[[1]] == 1,])[ford, ford]
corkp2 <- cor(dat[L[[1]] == 2,])[ford, ford]
difCor12 <- (corkp1 - corkp2)
layout(matrix(c(1,2,3,3), 2, 2, byrow=TRUE))
corrplot(corkp1,method="color",tl.col=ccol[ford], tl.cex=0.8, mar=c(0,0,3,0))
title("Cluster 1")
corrplot(corkp2,method="color",tl.col=ccol[ford], tl.cex=0.8, mar=c(0,0,3,0))
title("Cluster 2")
corrplot(difCor12,is.corr=FALSE,method="color",
tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3","white","darkorange"))(50))
title("Difference(1,2)")Notice that the non-synaptic markers change very little between clusters. Also note that the correlations between (gad, VGAT, PV, Gephyr) and VGlut1 at both times change significantly between clusters.
Using the location data and the results of K-means++ we show a 3d scatter plot colored accoding to cluster.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs1 <- loc[s1,]
locs1$cluster <- L[[1]][s1]
plot3d(locs1$V1,locs1$V2,locs1$V3,
col=brewer.pal(4,"Set1")[order(table(L[[1]]))][locs1$cluster],
alpha=0.75,
xlab='x',
ylab='y',
zlab='z')
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocations", height=720, width=720)## Formatting data for heatmap
aggp2 <- aggregate(dat,by=list(lab=L[[2]]),FUN=function(x){mean(x)})
aggp2 <- as.matrix(aggp2[,-1])[, ford]
rownames(aggp2) <- clusterFraction(L[[2]])The following are heatmaps generated from clustering via K-means++
heatmap.2(as.matrix(aggp2),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.") Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(8,"Set1")[order(table(L[[2]]))]
cf2 <- data.frame(cf = clusterFraction(L[[2]]))
ggJ2 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv2))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[c(2,8,14,20)], y = 1.15*max(ggJdat$values),
label= cf2[1:4,]) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))print(ggJ2)The fraction of data points within each cluster are given at the top of the plot window.
corLV2 <- lapply(c(1:4),function(x){cor(dat[L[[2]] == x,])[ford, ford]})
difCor1112 <- ((corLV2[[1]] - corLV2[[2]]))
difCor2122 <- ((corLV2[[3]] - corLV2[[4]]))
layout(matrix(c(1,2,3,3,4,5,6,6), 4, 2, byrow=TRUE))
corrplot(corLV2[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 1")
corrplot(corLV2[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 2")
corrplot(difCor1112, method="color", tl.col=ccol[ford],
tl.cex=0.8,
mar = c(0,0,3,0),
cl.lim = c(min(difCor1112,difCor2122),max(difCor1112,difCor2122)),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(100))
title("Difference(1,2)")
corrplot(corLV2[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 3")
corrplot(corLV2[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 4")
corrplot(difCor2122, method="color", tl.col=ccol[ford],
tl.cex=0.8,
mar=c(0,0,3,0),
cl.lim = c(min(difCor1112,difCor2122),max(difCor1112,difCor2122)),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(100))
title("Difference(3,4)")Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs2 <- loc[s1,]
locs2$cluster <- L[[2]][s1]
YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
col.pal <- colorRampPalette(YlOrBr)
plot3d(locs2$V1,locs2$V2,locs2$V3,
#col=colorpanel(8,"brown","blue")[order(table(L[[2]]))][locs2$cluster],
col=col.pal(8)[-seq(1,8,2)][order(table(L[[2]]))][locs2$cluster],
alpha=0.75,
xlab='x',
ylab='y',
zlab='z'
)
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV2", height=720, width=720)## Formatting data for heatmap
aggp3 <- aggregate(dat,by=list(lab=L[[3]]),FUN=function(x){mean(x)})
aggp3 <- as.matrix(aggp3[,-1])[, ford]
rownames(aggp3) <- clusterFraction(L[[3]])The following are heatmaps generated from clustering via K-means++
heatmap.2(as.matrix(aggp3),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.") Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(8,"Set1")[order(table(L[[3]]))]
cf3 <- data.frame(cf = clusterFraction(L[[3]]))
ggJ3 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv3))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[seq(2,22,length=8)], y = 1.05*max(ggJdat$values),
label= cf3[1:8,]) +
#geom_jitter(width=2) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))print(ggJ3)corLV3 <- lapply(c(1:8),function(x){cor(dat[L[[3]] == x,])[ford, ford]})
difCor1 <- (corLV3[[1]] - corLV3[[2]])
difCor2 <- (corLV3[[3]] - corLV3[[4]])
difCor3 <- (corLV3[[5]] - corLV3[[6]])
difCor4 <- (corLV3[[7]] - corLV3[[8]])
M <- max(difCor1, difCor2, difCor3, difCor4)
m <- min(difCor1, difCor2, difCor3, difCor4)
layout(matrix(c(1, 2, 3, 3,
4, 5, 6, 6,
7, 8, 9, 9,
10, 11, 12, 12), 8,2, byrow=TRUE))
corrplot(corLV3[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 1')
corrplot(corLV3[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 2')
corrplot(difCor1,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim=c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(1,2)')
corrplot(corLV3[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 3')
corrplot(corLV3[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 4')
corrplot(difCor2,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(3,4)')
corrplot(corLV3[[5]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 5')
corrplot(corLV3[[6]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 6')
corrplot(difCor3,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(5,6)')
corrplot(corLV3[[7]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 7')
corrplot(corLV3[[8]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 8')
corrplot(difCor4,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(7,8)')Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs3 <- loc[s1,]
locs3$cluster <- L[[3]][s1]
plot3d(locs3$V1,locs3$V2,locs3$V3,
col=col.pal(16)[-seq(1,8,2)][order(table(L[[3]]))][locs3$cluster],
alpha=0.65,
xlab='x',
ylab='y',
zlab='z'
)
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV3", height=720, width=720)