1 MEDA run

Loading the class labels sent by Forrest

cl <- read.csv("cleft_class.csv")[-18,]
f0 <- read.csv("collman15v2_tightAnnotationsF0.csv")
sf0 <- scale(f0, center = TRUE, scale = TRUE)

loc <- read.csv("locationstest20171211.csv")
table(cl$gaba)
## 
##   0   1 
## 214  22
table(cl$postgaba)
## 
##   0   1 
## 212  24
#ccol <- read.csv("params.csv")[1,-c(1,14)]
ccol <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'green', 'green')
set.seed(1030)
L <- runAll(sf0, ccol = ccol)
L[[1]] <- mlocation(f0, ccol = ccol)

1.1 mlocation

1.2 d1heat

1.3 cumvar

1.4 outliers

1.5 pairhex

1.6 correlation

1.7 dendrogram

##      1     21     22 
## 0.7712 0.1314 0.0975

1.8 pairs plots

1.9 stacked means

1.10 cluster means

2 Set \(K = 2\)

Here we restrict our hierarchical GMM function to one split only and then compare with the true gaba/non-gaba class labels.

set.seed(1030)
h2 <- hmc(sf0, maxDepth = 2)
l2 <- h2$dat$labels$col - 1
p0 <- mclust::adjustedRandIndex(l2, cl$gaba)
perms <- foreach(i = 1:1e4, .combine = c) %dopar% {
  set.seed(i*2)
  mclust::adjustedRandIndex(sample(l2), cl$gaba)
}
tmp <- h2$dat$labels$col
pairs(h2$dat$data, 
      col = viridis(max(tmp))[tmp],
      pch = 19, cex = 0.5, main = "colored by prediction")

pairs(h2$dat$data, 
      col = c('darkblue', 'violet')[cl$gaba +1],
      pch = 19, cex = 0.5, main = "colored by truth")

plotDend(h2)

##      1      2 
## 0.8093 0.1907
stackM(h2, depth = 2, centered = TRUE, ccol = ccol)

hist(perms, xlim = c(min(perms), p0 + 0.25*p0),
     main = "permutation test of ARI values")
abline(v = p0, col = 'red')

df1 <- data.frame(loc)
df1$gaba <- as.factor(cl$gaba)
df1$classification <- as.factor(tmp -1)
df1$correct_classification <- (df1$classification == df1$gaba)
df1$TN <- (df1$classification == 0 & df1$gaba == 0)
df1$FN <- (df1$classification == 0 & df1$gaba == 1)
df1$FP <- (df1$classification == 1 & df1$gaba == 0)
df1$TP <- (df1$classification == 1 & df1$gaba == 1)
#df1$text <- links

p1 <- ggplot(df1, aes(x = x, y=y,z=z, col = gaba, shape =
                      correct_classification)) + 
  facet_wrap(~ z, ncol = 6) +
  geom_point()

p1

2.0.1 same as above with interactivity

ggplotly(p1)
#p2 <- plot_ly(df1, x = ~x, y = ~y, color = ~gaba,
#             hoverinfo = 'text',
#               text = ~links)
#p2
#
#htmlwidgets::saveWidget(as_widget(p2), "links.html")

2.0.2 Confusion matrix (classfication on rows)

(ta <- table(classification = tmp-1, df1$gaba))
##               
## classification   0   1
##              0 189   2
##              1  25  20

The above table shows that out of 22 true gaba synapses 2 were mis-classified as non-gaba synapses.

4 RF

set.seed(317)
rfdat <- data.table(gaba = as.factor(cl$gaba), sf0)
rf1 <- randomForest(gaba ~ ., data = rfdat)

#set.seed(317)
set.seed(2^13)
train <- sample(nrow(rfdat), 100)
test <- setdiff(1:nrow(rfdat), train)
table(rfdat[test,]$gaba)
## 
##   0   1 
## 123  13
rf2 <- randomForest(gaba ~ ., data = rfdat[train,], importance = TRUE)
print(rf2)
## 
## Call:
##  randomForest(formula = gaba ~ ., data = rfdat[train, ], importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 3%
## Confusion matrix:
##    0 1 class.error
## 0 91 0   0.0000000
## 1  3 6   0.3333333
rf2.pred <- predict(rf2, newdata = rfdat[test,-c('gaba')])

table(rf2.pred, rfdat[test,]$gaba)
##         
## rf2.pred   0   1
##        0 122   2
##        1   1  11
varImpPlot(rf2)