Homepage
The formatted source code for this file is here.
And a raw version here.
Previous work by Youngser Park can be found here.
We now have the following data sets:
featF1
: The feature vector looking only at the integrated brightness features.fs
: The feature vector scaled between \([0,1000]\).dat <- fs
## Formatting data for heatmap
aggp <- apply(dat, 2, mean)
aggp <- t(cbind(aggp, aggp))[, ford]
The following are heatmaps generated from clustering via K-means++ (at level 1)
heatmap.2(as.matrix(aggp),dendrogram='none',Colv=NA,trace="none",
col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,
symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.",
labRow = "")
Percentage of data within cluster is presented on the right side of the heatmap.
set.seed(1024)
s2 <- sample(dim(dat)[1], 1e4)
ggJdat <- data.table(cbind(stack(dat[s2]),L[s2]))
ggJdat$ind <- factor(ggJdat$ind, ordered=TRUE, levels=names(dat)[ford])
ggJ0 <-
ggplot(data = ggJdat, aes(x = ind, y = values)) +
geom_point(alpha=0.75) +
geom_jitter(width = 1) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))
print(ggJ0)
The above scatter plot is a random sample of the data points.
cmat <- cor(fs)[ford, ford]
corrplot(cmat,method="color",tl.col=ccol[ford], tl.cex=1)
We run a Hierachical K-means++ for \(K=2\) on the fs F1
data with 4 levels.
set.seed(2^13)
L <- bhkmpp(dat,blevels=4)
## Formatting data for heatmap
aggp <- aggregate(dat,by=list(lab=L[[1]]),FUN=mean)
aggp <- as.matrix(aggp[,-1])[, ford]
rownames(aggp) <- clusterFraction(L[[1]])
The following are heatmaps generated from clustering via K-means++ (at level 1)
heatmap.2(as.matrix(aggp),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.")
Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(4,"Set1")[order(table(L[[1]]))]
cf1 <- data.frame(cf = clusterFraction(L[[1]]))
ggJ1 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv1))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[c(2,20)], y = 1.15*max(ggJdat$values),
label= cf1[1:2,]) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))
print(ggJ1)
corkp1 <- cor(dat[L[[1]] == 1,])[ford, ford]
corkp2 <- cor(dat[L[[1]] == 2,])[ford, ford]
difCor12 <- (corkp1 - corkp2)
layout(matrix(c(1,2,3,3), 2, 2, byrow=TRUE))
corrplot(corkp1,method="color",tl.col=ccol[ford], tl.cex=0.8, mar=c(0,0,3,0))
title("Cluster 1")
corrplot(corkp2,method="color",tl.col=ccol[ford], tl.cex=0.8, mar=c(0,0,3,0))
title("Cluster 2")
corrplot(difCor12,is.corr=FALSE,method="color",
tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3","white","darkorange"))(50))
title("Difference(1,2)")
Notice that the non-synaptic markers change very little between clusters. Also note that the correlations between (gad, VGAT, PV, Gephyr
) and VGlut1
at both times change significantly between clusters.
Using the location data and the results of K-means++ we show a 3d scatter plot colored accoding to cluster.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs1 <- loc[s1,]
locs1$cluster <- L[[1]][s1]
plot3d(locs1$V1,locs1$V2,locs1$V3,
col=brewer.pal(4,"Set1")[order(table(L[[1]]))][locs1$cluster],
alpha=0.75,
xlab='x',
ylab='y',
zlab='z')
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocations", height=720, width=720)
## Formatting data for heatmap
aggp2 <- aggregate(dat,by=list(lab=L[[2]]),FUN=function(x){mean(x)})
aggp2 <- as.matrix(aggp2[,-1])[, ford]
rownames(aggp2) <- clusterFraction(L[[2]])
The following are heatmaps generated from clustering via K-means++
heatmap.2(as.matrix(aggp2),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.")
Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(8,"Set1")[order(table(L[[2]]))]
cf2 <- data.frame(cf = clusterFraction(L[[2]]))
ggJ2 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv2))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[c(2,8,14,20)], y = 1.15*max(ggJdat$values),
label= cf2[1:4,]) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))
print(ggJ2)
The fraction of data points within each cluster are given at the top of the plot window.
corLV2 <- lapply(c(1:4),function(x){cor(dat[L[[2]] == x,])[ford, ford]})
difCor1112 <- ((corLV2[[1]] - corLV2[[2]]))
difCor2122 <- ((corLV2[[3]] - corLV2[[4]]))
layout(matrix(c(1,2,3,3,4,5,6,6), 4, 2, byrow=TRUE))
corrplot(corLV2[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 1")
corrplot(corLV2[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 2")
corrplot(difCor1112, method="color", tl.col=ccol[ford],
tl.cex=0.8,
mar = c(0,0,3,0),
cl.lim = c(min(difCor1112,difCor2122),max(difCor1112,difCor2122)),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(100))
title("Difference(1,2)")
corrplot(corLV2[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 3")
corrplot(corLV2[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title("Cluster 4")
corrplot(difCor2122, method="color", tl.col=ccol[ford],
tl.cex=0.8,
mar=c(0,0,3,0),
cl.lim = c(min(difCor1112,difCor2122),max(difCor1112,difCor2122)),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(100))
title("Difference(3,4)")
Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs2 <- loc[s1,]
locs2$cluster <- L[[2]][s1]
YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
col.pal <- colorRampPalette(YlOrBr)
plot3d(locs2$V1,locs2$V2,locs2$V3,
#col=colorpanel(8,"brown","blue")[order(table(L[[2]]))][locs2$cluster],
col=col.pal(8)[-seq(1,8,2)][order(table(L[[2]]))][locs2$cluster],
alpha=0.75,
xlab='x',
ylab='y',
zlab='z'
)
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV2", height=720, width=720)
## Formatting data for heatmap
aggp3 <- aggregate(dat,by=list(lab=L[[3]]),FUN=function(x){mean(x)})
aggp3 <- as.matrix(aggp3[,-1])[, ford]
rownames(aggp3) <- clusterFraction(L[[3]])
The following are heatmaps generated from clustering via K-means++
heatmap.2(as.matrix(aggp3),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.")
Percentage of data within cluster is presented on the right side of the heatmap.
ggCol <- brewer.pal(8,"Set1")[order(table(L[[3]]))]
cf3 <- data.frame(cf = clusterFraction(L[[3]]))
ggJ3 <-
ggplot(data = ggJdat, aes(x = ind, y = values,
color = as.factor(lv3))) +
scale_color_manual(values=ggCol, name="Cluster") +
geom_point(alpha=0.25, position=position_jitterdodge()) +
geom_boxplot(alpha =0.35, outlier.color = 'NA') +
annotate("text", x = levels(ggJdat$ind)[seq(2,22,length=8)], y = 1.05*max(ggJdat$values),
label= cf3[1:8,]) +
#geom_jitter(width=2) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_text(color = ccol[ford],
angle=45,
vjust = 0.5))
print(ggJ3)
corLV3 <- lapply(c(1:8),function(x){cor(dat[L[[3]] == x,])[ford, ford]})
difCor1 <- (corLV3[[1]] - corLV3[[2]])
difCor2 <- (corLV3[[3]] - corLV3[[4]])
difCor3 <- (corLV3[[5]] - corLV3[[6]])
difCor4 <- (corLV3[[7]] - corLV3[[8]])
M <- max(difCor1, difCor2, difCor3, difCor4)
m <- min(difCor1, difCor2, difCor3, difCor4)
layout(matrix(c(1, 2, 3, 3,
4, 5, 6, 6,
7, 8, 9, 9,
10, 11, 12, 12), 8,2, byrow=TRUE))
corrplot(corLV3[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 1')
corrplot(corLV3[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 2')
corrplot(difCor1,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim=c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(1,2)')
corrplot(corLV3[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 3')
corrplot(corLV3[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 4')
corrplot(difCor2,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(3,4)')
corrplot(corLV3[[5]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 5')
corrplot(corLV3[[6]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 6')
corrplot(difCor3,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(5,6)')
corrplot(corLV3[[7]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 7')
corrplot(corLV3[[8]],method="color",tl.col=ccol[ford], tl.cex=0.8,
mar=c(0,0,3,0))
title('Cluster 8')
corrplot(difCor4,method="color",tl.col=ccol[ford], tl.cex=0.8,
cl.lim= c(m,M),
mar=c(0,0,3,0),
col=colorRampPalette(c("#998ec3",
"white",
"darkorange"))(50))
title('Difference(7,8)')
Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.
set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)
locs3 <- loc[s1,]
locs3$cluster <- L[[3]][s1]
plot3d(locs3$V1,locs3$V2,locs3$V3,
col=col.pal(16)[-seq(1,8,2)][order(table(L[[3]]))][locs3$cluster],
alpha=0.65,
xlab='x',
ylab='y',
zlab='z'
)
subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV3", height=720, width=720)