1 MEDA run

Loading the class labels sent by Forrest

cl <- read.csv("cleft_class.csv")[-18,]
fmax <- read.csv("collman15v2_Fmax_20171219.csv")[,-c(1:3)]
sfmax <- scale(fmax, center = TRUE, scale = TRUE)

loc <- read.csv("locations_collman15v2_Fmax_20171219.csv")

table(cl$gaba)

## 
##   0   1 
## 214  22

table(cl$postgaba)

## 
##   0   1 
## 212  24

#ccol <- read.csv("params.csv")[1,-c(1,14)]
ccol <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'green', 'green')
ccol <- ccol[-c(1:3)]
set.seed(1234)

L <- list()
L[[1]] <- mlocation(fmax, ccol = ccol)
L[[2]] <- d1heat(sfmax, ccol = ccol)
L[[3]] <- cumvar(sfmax)
L[[4]] <- outliers(sfmax)
L[[5]] <- pairhex(sfmax, ccol = ccol)
L[[6]] <- medacor(fmax, ccol = ccol)
set.seed(1234)
L[[7]] <- hmc(sfmax, ccol = ccol,
              maxDepth = 2, modelNames = "VVV")
#plot(L[[7]])
#plotDend(L[[7]])
#stackM(L[[7]], ccol = ccol, centered = TRUE)

Note that we are using VVV as the model.

1.1 mlocation

1.2 d1heat

1.3 cumvar

1.4 outliers

1.5 pairhex

1.6 correlation

1.7 dendrogram

##      1      2 
## 0.5636 0.4364

1.8 pairs plots

1.9 stacked means

1.10 cluster means

2 Set \(K = 2\)

Here we restrict our hierarchical GMM function to one split only and then compare with the true gaba/non-gaba class labels.

set.seed(1030)
#set.seed(1234)
h2 <- hmc(sfmax, ccol = ccol, maxDepth = 2, modelNames = 'VVE')
l2 <- h2$dat$labels$col - 1
p0 <- mclust::adjustedRandIndex(l2, cl$gaba)

perms <- foreach(i = 1:1e4, .combine = c) %dopar% {
  set.seed(i*2)
  mclust::adjustedRandIndex(sample(l2), cl$gaba)
}

tmp <- h2$dat$labels$col
pairs(h2$dat$data, 
      col = viridis(max(tmp))[tmp],
      pch = 19, cex = 0.5, main = "colored by prediction")

pairs(h2$dat$data, 
      col = c('darkblue', 'violet')[cl$gaba +1],
      pch = 19, cex = 0.5, main = "colored by truth")

plotDend(h2)

##      1      2 
## 0.5975 0.4025

stackM(h2, depth = 2, centered = TRUE, ccol = ccol)

hist(perms, xlim = c(min(perms), p0 + 0.25*p0),
     main = "permutation test of ARI values", probability = TRUE)
#hist(perms, probability = TRUE)
abline(v = p0, col = 'red')

df1 <- data.frame(loc)
df1$gaba <- as.factor(cl$gaba)
df1$classification <- as.factor(tmp -1)
df1$correct_classification <- (df1$classification == df1$gaba)
df1$TN <- (df1$classification == 0 & df1$gaba == 0)
df1$FN <- (df1$classification == 0 & df1$gaba == 1)
df1$FP <- (df1$classification == 1 & df1$gaba == 0)
df1$TP <- (df1$classification == 1 & df1$gaba == 1)
#df1$text <- links

p1 <- ggplot(df1, aes(x = x, y=y,z=z, col = gaba, shape =
                      correct_classification)) + 
  facet_wrap(~ z, ncol = 6) +
  geom_point()

p1

2.0.1 same as above with interactivity

ggplotly(p1)

#p2 <- plot_ly(df1, x = ~x, y = ~y, color = ~gaba,
#             hoverinfo = 'text',
#               text = ~links)
#p2
#
#htmlwidgets::saveWidget(as_widget(p2), "links.html")

2.0.2 Confusion matrix (classfication on rows)

(ta <- table(classification = tmp-1, df1$gaba))

##               
## classification   0   1
##              0 140   1
##              1  74  21

The above table shows that out of 22 true gaba synapses 1 were mis-classified as non-gaba synapses.

3 NDVIZ Links

Note that the centroids are caluclated from the annotations which means that the centorid might not be in an area with annotated pixels.

3.1 False Negatives

FalseNegatives
914, 3286, 19

3.2 False Positives

FalsePositives
813, 3796, 1
2036, 2401, 4
1606, 3000, 2
3633, 577, 1
5545, 3306, 3
3514, 3550, 1
1344, 3292, 2
4970, 3472, 4
118, 1145, 6
5877, 2918, 22
6071, 1825, 26
6111, 1644, 26
4853, 1339, 25
4411, 1315, 26
3217, 1429, 26
3703, 893, 15
3849, 1908, 23
3302, 743, 21
3578, 2138, 25
3554, 1703, 24
3090, 3194, 25
2869, 2945, 21
3151, 4371, 22
3162, 4218, 22
1587, 3307, 26
1055, 3696, 26
1106, 3878, 23
1074, 2975, 24
2376, 3108, 21
1266, 2852, 12
1515, 2552, 24
2489, 2017, 26
2026, 2118, 5
2018, 1707, 5
2076, 2656, 6
1178, 1880, 17
1552, 1730, 15
1383, 2203, 17
845, 2501, 16
1336, 814, 24
1043, 608, 24
1609, 982, 15
2730, 1537, 12
2017, 563, 5
929, 1433, 10
2539, 1184, 17
1615, 1220, 19
1194, 3913, 16
2187, 3882, 12
825, 3388, 10
367, 3663, 7
323, 3753, 25
3574, 3583, 16
3785, 3192, 12
2635, 3547, 10
5300, 3924, 14
5331, 3303, 16
5028, 3495, 14
4786, 3159, 5
3993, 3865, 6
4500, 1352, 7
4320, 527, 15
3536, 388, 13
5226, 2296, 12
4684, 1851, 13
1509, 2360, 10
682, 2228, 9
3425, 1444, 13
2845, 1312, 14
1859, 3559, 16
1578, 3834, 4
4938, 185, 8
4351, 3595, 17
48, 3880, 7

3.3 True Negatives

TrueNegatives
3565, 3207, 1
2999, 3356, 1
2691, 3820, 0
2137, 3803, 1
2079, 4014, 0
2417, 3384, 2
3525, 2414, 1
3231, 1826, 4
4440, 3234, 0
5102, 2540, 0
4029, 2877, 1
1518, 1855, 1
4332, 1215, 1
5097, 1103, 1
3196, 534, 1
458, 2538, 2
1030, 2559, 0
4357, 896, 1
1130, 1386, 0
3184, 2673, 0
883, 2296, 2
5597, 2381, 2
1590, 2291, 3
4435, 3060, 3
4543, 1195, 2
4339, 506, 3
5141, 707, 4
4597, 3678, 4
764, 2557, 5
1427, 1487, 6
5837, 2975, 2
4068, 1731, 4
5447, 4249, 25
5816, 4134, 24
4874, 4130, 25
4306, 3978, 26
5188, 4078, 23
5267, 3637, 22
4771, 3289, 20
5646, 2647, 24
4968, 2977, 16
5567, 1785, 24
4393, 2188, 18
4582, 2195, 26
5620, 1488, 25
5895, 1168, 23
5411, 1105, 22
5336, 632, 19
5693, 850, 17
5457, 629, 13
4379, 909, 26
4184, 815, 26
2959, 1793, 23
3534, 1542, 20
2857, 1442, 18
2873, 1210, 23
2975, 2101, 26
2955, 2536, 21
3461, 2589, 12
4651, 2243, 11
3782, 1631, 9
3453, 1913, 13
2812, 1727, 21
3528, 3276, 24
3244, 3550, 25
3426, 3850, 20
3243, 4085, 26
2938, 4485, 24
2651, 3601, 26
2681, 4021, 26
2354, 4199, 25
1879, 2965, 18
2008, 2609, 25
1502, 3058, 15
1889, 1666, 25
2302, 1650, 21
2648, 2358, 24
1769, 1577, 19
1940, 1745, 16
2662, 2537, 12
2132, 2038, 8
2774, 1770, 5
3445, 1664, 10
3091, 1900, 13
1995, 2116, 21
958, 1755, 24
1657, 2308, 15
2374, 1763, 24
2666, 1098, 24
2353, 981, 23
1794, 592, 18
2587, 1417, 10
1232, 853, 10
1029, 460, 8
1650, 1107, 7
1747, 1035, 8
1466, 503, 26
1256, 4142, 16
3121, 3628, 17
3160, 2805, 10
2852, 3217, 10
2692, 3099, 10
3339, 4035, 13
5097, 4194, 15
4300, 2929, 16
4299, 3761, 9
4839, 3594, 7
4153, 3103, 6
4212, 3355, 4
4164, 3620, 4
4383, 1559, 11
4164, 980, 5
4748, 905, 4
4645, 631, 7
3314, 699, 10
4644, 523, 15
3618, 610, 12
3815, 295, 12
3122, 355, 11
3042, 993, 11
5296, 1230, 11
5039, 2436, 15
1092, 2639, 5
1214, 1704, 7
3231, 1678, 15
2284, 1008, 12
4136, 2016, 7
651, 356, 6
370, 266, 13
586, 1401, 20
1177, 297, 15
1922, 4140, 2
2461, 4070, 4
3620, 1665, 14
3588, 2048, 7
5470, 2417, 4
4585, 3916, 20
1758, 4150, 20
786, 1221, 12
66, 3401, 9

3.4 True Positives

TruePositives
2163, 892, 2
6002, 2969, 25
3993, 2950, 20
6174, 1854, 25
3800, 2549, 20
4060, 2599, 13
3553, 4245, 21
1070, 3472, 24
1666, 3290, 18
1103, 2926, 12
993, 2979, 14
479, 3478, 20
3385, 3538, 14
89, 2717, 13
5193, 3731, 4
3962, 3348, 4
5003, 1431, 13
4523, 2483, 5
734, 2080, 12
3292, 4462, 22
3490, 4476, 21

4 RF

set.seed(317)
rfdat <- data.table(gaba = as.factor(cl$gaba), sfmax)
rf1 <- randomForest(gaba ~ ., data = rfdat)

#set.seed(317)
set.seed(2^13)
train <- sample(nrow(rfdat), 100)
test <- setdiff(1:nrow(rfdat), train)
table(rfdat[test,]$gaba)

## 
##   0   1 
## 123  13

rf2 <- randomForest(gaba ~ ., data = rfdat[train,], importance = TRUE)
print(rf2)

## 
## Call:
##  randomForest(formula = gaba ~ ., data = rfdat[train, ], importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 9%
## Confusion matrix:
##    0 1 class.error
## 0 90 1  0.01098901
## 1  8 1  0.88888889

rf2.pred <- predict(rf2, newdata = rfdat[test,-c('gaba')])

table(rf2.pred, rfdat[test,]$gaba)

##         
## rf2.pred   0   1
##        0 123   7
##        1   0   6

varImpPlot(rf2)

Collman15v2 Gaba comparisons using Maximum in cube of size ~0.5 micron without DAPI channels

Jesse Leigh Patsolic