Homepage
The formatted source code for this file is here.
And a raw version here.
Previous work by Youngser Park can be found here.

We now have the following data sets:

featF1: The feature vector looking only at the integrated brightness features.
fs: The feature vector scaled between \([0,1000]\).

1 Level 0

dat <- fs

1.1 Heat maps (Lv 0):

## Formatting data for heatmap
aggp <- apply(dat, 2, mean)
aggp <- t(cbind(aggp, aggp))[, ford]

The following are heatmaps generated from clustering via K-means++ (at level 1)

heatmap.2(as.matrix(aggp),dendrogram='none',Colv=NA,trace="none", 
          col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,
          symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.", 
          labRow = "")

**Figure 1: Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.**

Percentage of data within cluster is presented on the right side of the heatmap.

1.2 Jittered scatter plot: Lv 0

set.seed(1024)
s2 <- sample(dim(dat)[1], 1e4)
ggJdat <- data.table(cbind(stack(dat[s2]),L[s2]))
ggJdat$ind <- factor(ggJdat$ind, ordered=TRUE, levels=names(dat)[ford])

ggJ0 <- 
  ggplot(data = ggJdat, aes(x = ind, y = values)) +
  geom_point(alpha=0.75) + 
  geom_jitter(width = 1) + 
  geom_boxplot(alpha =0.35, outlier.color = 'NA') + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(color = ccol[ford], 
                                   angle=45,
                                   vjust = 0.5))

print(ggJ0)

The above scatter plot is a random sample of the data points.

1.3 Correlations: Lv 0

cmat <- cor(fs)[ford, ford]
corrplot(cmat,method="color",tl.col=ccol[ford], tl.cex=1)

**Figure 3: Correlation on linearly scaled F1 data, reordered by synapse type.**

2 Level 1: K-means++ for \(K=2\).

We run a Hierachical K-means++ for \(K=2\) on the fs F1 data with 4 levels.

set.seed(2^13)
L <- bhkmpp(dat,blevels=4)

2.1 Heat maps (Lv 1):

## Formatting data for heatmap
aggp <- aggregate(dat,by=list(lab=L[[1]]),FUN=mean)
aggp <- as.matrix(aggp[,-1])[, ford]
rownames(aggp) <- clusterFraction(L[[1]])

The following are heatmaps generated from clustering via K-means++ (at level 1)

heatmap.2(as.matrix(aggp),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.")

**Figure 4: Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.**

Percentage of data within cluster is presented on the right side of the heatmap.

2.2 Jittered scatter plot: Lv 1

ggCol <- brewer.pal(4,"Set1")[order(table(L[[1]]))]

cf1 <- data.frame(cf = clusterFraction(L[[1]]))

ggJ1 <- 
  ggplot(data = ggJdat, aes(x = ind, y = values, 
                         color = as.factor(lv1))) +
  scale_color_manual(values=ggCol, name="Cluster") + 
  geom_point(alpha=0.25, position=position_jitterdodge()) + 
  geom_boxplot(alpha =0.35, outlier.color = 'NA') + 
  annotate("text", x = levels(ggJdat$ind)[c(2,20)], y = 1.15*max(ggJdat$values), 
           label= cf1[1:2,]) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(color = ccol[ford], 
                                   angle=45,
                                   vjust = 0.5))

print(ggJ1)

2.3 Within cluster correlations (Lv 1)

corkp1 <- cor(dat[L[[1]] == 1,])[ford, ford]
corkp2 <- cor(dat[L[[1]] == 2,])[ford, ford]
difCor12 <- (corkp1 - corkp2)

layout(matrix(c(1,2,3,3), 2, 2, byrow=TRUE))
corrplot(corkp1,method="color",tl.col=ccol[ford], tl.cex=0.8, mar=c(0,0,3,0))
title("Cluster 1")
corrplot(corkp2,method="color",tl.col=ccol[ford], tl.cex=0.8, mar=c(0,0,3,0))
title("Cluster 2")
corrplot(difCor12,is.corr=FALSE,method="color",
         tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0),
         col=colorRampPalette(c("#998ec3","white","darkorange"))(50))
title("Difference(1,2)")

**Figure 6: Within cluster correlations, clock-wise from top left, Cluster 1, Cluster 2, difference C1 - C2**

Notice that the non-synaptic markers change very little between clusters. Also note that the correlations between (gad, VGAT, PV, Gephyr) and VGlut1 at both times change significantly between clusters.

2.4 Clusters and Spatial Location (Lv 1)

Using the location data and the results of K-means++ we show a 3d scatter plot colored accoding to cluster.

set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)

locs1 <- loc[s1,]
locs1$cluster <- L[[1]][s1]

plot3d(locs1$V1,locs1$V2,locs1$V3,
       col=brewer.pal(4,"Set1")[order(table(L[[1]]))][locs1$cluster],
       alpha=0.75,
       xlab='x', 
       ylab='y', 
       zlab='z')

subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocations", height=720, width=720)

3 Level 2: K-means++ for \(K=2\).

3.1 Heat maps (Lv 2):

## Formatting data for heatmap
aggp2 <- aggregate(dat,by=list(lab=L[[2]]),FUN=function(x){mean(x)}) 
aggp2 <- as.matrix(aggp2[,-1])[, ford]
rownames(aggp2) <- clusterFraction(L[[2]])

The following are heatmaps generated from clustering via K-means++

heatmap.2(as.matrix(aggp2),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.")

**Figure 7: Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.**

Percentage of data within cluster is presented on the right side of the heatmap.

3.2 Jittered scatter plot: Lv 2

ggCol <- brewer.pal(8,"Set1")[order(table(L[[2]]))]
cf2 <- data.frame(cf = clusterFraction(L[[2]]))

ggJ2 <- 
  ggplot(data = ggJdat, aes(x = ind, y = values, 
                         color = as.factor(lv2))) +
  scale_color_manual(values=ggCol, name="Cluster") + 
  geom_point(alpha=0.25, position=position_jitterdodge()) + 
  geom_boxplot(alpha =0.35, outlier.color = 'NA') + 
  annotate("text", x = levels(ggJdat$ind)[c(2,8,14,20)], y = 1.15*max(ggJdat$values), 
           label= cf2[1:4,]) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(color = ccol[ford], 
                                   angle=45,
                                   vjust = 0.5))

print(ggJ2)

The fraction of data points within each cluster are given at the top of the plot window.

3.3 Within cluster correlations (Lv 2)

corLV2 <- lapply(c(1:4),function(x){cor(dat[L[[2]] == x,])[ford, ford]})

difCor1112 <- ((corLV2[[1]] - corLV2[[2]]))
difCor2122 <- ((corLV2[[3]] - corLV2[[4]]))

layout(matrix(c(1,2,3,3,4,5,6,6), 4, 2, byrow=TRUE))
corrplot(corLV2[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title("Cluster 1")
corrplot(corLV2[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title("Cluster 2")
corrplot(difCor1112, method="color", tl.col=ccol[ford], 
         tl.cex=0.8,
         mar = c(0,0,3,0),
         cl.lim = c(min(difCor1112,difCor2122),max(difCor1112,difCor2122)),
         col=colorRampPalette(c("#998ec3", 
                                "white",
                                "darkorange"))(100))
title("Difference(1,2)")
corrplot(corLV2[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title("Cluster 3")
corrplot(corLV2[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title("Cluster 4")
corrplot(difCor2122, method="color", tl.col=ccol[ford], 
         tl.cex=0.8,
         mar=c(0,0,3,0),
         cl.lim = c(min(difCor1112,difCor2122),max(difCor1112,difCor2122)),
         col=colorRampPalette(c("#998ec3", 
                                "white",
                                "darkorange"))(100))
title("Difference(3,4)")

**Figure 9: Within cluster correlations for level 2. (c11, c12, c21, c22) with differences**

3.4 Clusters and Spatial Location (Lv 2)

Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.

set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)

locs2 <- loc[s1,]
locs2$cluster <- L[[2]][s1]

YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")
col.pal <- colorRampPalette(YlOrBr)

plot3d(locs2$V1,locs2$V2,locs2$V3,
       #col=colorpanel(8,"brown","blue")[order(table(L[[2]]))][locs2$cluster],
       col=col.pal(8)[-seq(1,8,2)][order(table(L[[2]]))][locs2$cluster],
       alpha=0.75,
       xlab='x', 
       ylab='y', 
       zlab='z'
       )

subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV2", height=720, width=720)

4 Level 3: K-means++ for \(K=2\).

4.1 Heat maps (Lv 3):

## Formatting data for heatmap
aggp3 <- aggregate(dat,by=list(lab=L[[3]]),FUN=function(x){mean(x)})
aggp3 <- as.matrix(aggp3[,-1])[, ford]
rownames(aggp3) <- clusterFraction(L[[3]])

The following are heatmaps generated from clustering via K-means++

heatmap.2(as.matrix(aggp3),dendrogram='row',Colv=NA,trace="none", col=mycol,colCol=ccol[ford],cexRow=0.8, keysize=1.25,symkey=FALSE,symbreaks=FALSE,scale="none", srtCol=90,main="Heatmap of `fs F1` data.")

**Figure 10: Heatmap of the cluster means vs channels. Rows and columns are rearranged according to synapse type.**

Percentage of data within cluster is presented on the right side of the heatmap.

4.2 Jittered scatter plot: Lv 3

ggCol <- brewer.pal(8,"Set1")[order(table(L[[3]]))]
cf3 <- data.frame(cf = clusterFraction(L[[3]]))

ggJ3 <- 
  ggplot(data = ggJdat, aes(x = ind, y = values, 
                         color = as.factor(lv3))) +
  scale_color_manual(values=ggCol, name="Cluster") + 
  geom_point(alpha=0.25, position=position_jitterdodge()) + 
  geom_boxplot(alpha =0.35, outlier.color = 'NA') + 
  annotate("text", x = levels(ggJdat$ind)[seq(2,22,length=8)], y = 1.05*max(ggJdat$values), 
           label= cf3[1:8,]) + 
  #geom_jitter(width=2) + 
  theme(axis.title.x = element_blank()) + 
  theme(axis.text.x = element_text(color = ccol[ford], 
                                   angle=45,
                                   vjust = 0.5))

print(ggJ3)

4.3 Within cluster correlations (Lv 3)

corLV3 <- lapply(c(1:8),function(x){cor(dat[L[[3]] == x,])[ford, ford]})

difCor1 <- (corLV3[[1]] - corLV3[[2]])
difCor2 <- (corLV3[[3]] - corLV3[[4]])
difCor3 <- (corLV3[[5]] - corLV3[[6]])
difCor4 <- (corLV3[[7]] - corLV3[[8]])
M <- max(difCor1, difCor2, difCor3, difCor4)
m <- min(difCor1, difCor2, difCor3, difCor4)

layout(matrix(c(1, 2, 3, 3,
                4, 5, 6, 6, 
                7, 8, 9, 9,
                10, 11, 12, 12), 8,2, byrow=TRUE))

corrplot(corLV3[[1]],method="color",tl.col=ccol[ford], tl.cex=0.8,
         mar=c(0,0,3,0))
title('Cluster 1')
corrplot(corLV3[[2]],method="color",tl.col=ccol[ford], tl.cex=0.8,
         mar=c(0,0,3,0))
title('Cluster 2')
corrplot(difCor1,method="color",tl.col=ccol[ford], tl.cex=0.8,
         cl.lim=c(m,M), 
         mar=c(0,0,3,0),
         col=colorRampPalette(c("#998ec3",
                                "white",
                                "darkorange"))(50))
title('Difference(1,2)')
corrplot(corLV3[[3]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title('Cluster 3')
corrplot(corLV3[[4]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title('Cluster 4')
corrplot(difCor2,method="color",tl.col=ccol[ford], tl.cex=0.8, 
         cl.lim= c(m,M),
         mar=c(0,0,3,0),
         col=colorRampPalette(c("#998ec3",
                                "white",
                                "darkorange"))(50))
title('Difference(3,4)')
corrplot(corLV3[[5]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title('Cluster 5')
corrplot(corLV3[[6]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title('Cluster 6')
corrplot(difCor3,method="color",tl.col=ccol[ford], tl.cex=0.8,
         cl.lim= c(m,M),
         mar=c(0,0,3,0),
         col=colorRampPalette(c("#998ec3",
                                "white",
                                "darkorange"))(50))
title('Difference(5,6)')
corrplot(corLV3[[7]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title('Cluster 7')
corrplot(corLV3[[8]],method="color",tl.col=ccol[ford], tl.cex=0.8, 
         mar=c(0,0,3,0))
title('Cluster 8')
corrplot(difCor4,method="color",tl.col=ccol[ford], tl.cex=0.8,
         cl.lim= c(m,M),
         mar=c(0,0,3,0),
         col=colorRampPalette(c("#998ec3",
                                "white",
                                "darkorange"))(50))
title('Difference(7,8)')

**Figure 12: Within cluster correlations for level 3. (c111, c112, c121, c122, c211, c212, c221, c222)**

4.4 Clusters and Spatial Location (Lv 3)

Using the location data and the results of K-means++ we show a 3d scatter plot colored according to cluster.

set.seed(2^12)
s1 <- sample(dim(loc)[1],5e4)

locs3 <- loc[s1,]
locs3$cluster <- L[[3]][s1]

plot3d(locs3$V1,locs3$V2,locs3$V3,
       col=col.pal(16)[-seq(1,8,2)][order(table(L[[3]]))][locs3$cluster],
       alpha=0.65,
       xlab='x', 
       ylab='y', 
       zlab='z'
       )

subid <- currentSubscene3d()
rglwidget(elementId="plot3dLocationsLV3", height=720, width=720)

Synapse Stats: Synapse Exploration on Lin F1

JLP

2016-09-22

1 Level 0

1.1 Heat maps (Lv 0):

1.2 Jittered scatter plot: Lv 0

1.3 Correlations: Lv 0

2 Level 1: K-means++ for \(K=2\).

2.1 Heat maps (Lv 1):

2.2 Jittered scatter plot: Lv 1

2.3 Within cluster correlations (Lv 1)

2.4 Clusters and Spatial Location (Lv 1)

3 Level 2: K-means++ for \(K=2\).

3.1 Heat maps (Lv 2):

3.2 Jittered scatter plot: Lv 2

3.3 Within cluster correlations (Lv 2)

3.4 Clusters and Spatial Location (Lv 2)

4 Level 3: K-means++ for \(K=2\).

4.1 Heat maps (Lv 3):

4.2 Jittered scatter plot: Lv 3

4.3 Within cluster correlations (Lv 3)

4.4 Clusters and Spatial Location (Lv 3)