1 MEDA run

Loading the class labels sent by Forrest

cl <- read.csv("cleft_class.csv")[-18,]
fmax <- read.csv("collman15v2_Fmax_20171219.csv")[,-c(1:3)]
sfmax <- scale(fmax, center = TRUE, scale = TRUE)

loc <- read.csv("locations_collman15v2_Fmax_20171219.csv")
table(cl$gaba)
## 
##   0   1 
## 214  22
table(cl$postgaba)
## 
##   0   1 
## 212  24
#ccol <- read.csv("params.csv")[1,-c(1,14)]
ccol <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'green', 'green')
ccol <- ccol[-c(1:3)]
set.seed(1234)

L <- list()
L[[1]] <- mlocation(fmax, ccol = ccol)
L[[2]] <- d1heat(sfmax, ccol = ccol)
L[[3]] <- cumvar(sfmax)
L[[4]] <- outliers(sfmax)
L[[5]] <- pairhex(sfmax, ccol = ccol)
L[[6]] <- medacor(fmax, ccol = ccol)
set.seed(1234)
L[[7]] <- hmc(sfmax, ccol = ccol,
              maxDepth = 2, modelNames = "VVV")
#plot(L[[7]])
#plotDend(L[[7]])
#stackM(L[[7]], ccol = ccol, centered = TRUE)

Note that we are using VVV as the model.

1.1 mlocation

1.2 d1heat

1.3 cumvar

1.4 outliers

1.5 pairhex

1.6 correlation

1.7 dendrogram

##      1      2 
## 0.5636 0.4364

1.8 pairs plots

1.9 stacked means

1.10 cluster means

2 Set \(K = 2\)

Here we restrict our hierarchical GMM function to one split only and then compare with the true gaba/non-gaba class labels.

set.seed(1030)
#set.seed(1234)
h2 <- hmc(sfmax, ccol = ccol, maxDepth = 2, modelNames = 'VVE')
l2 <- h2$dat$labels$col - 1
p0 <- mclust::adjustedRandIndex(l2, cl$gaba)
perms <- foreach(i = 1:1e4, .combine = c) %dopar% {
  set.seed(i*2)
  mclust::adjustedRandIndex(sample(l2), cl$gaba)
}
tmp <- h2$dat$labels$col
pairs(h2$dat$data, 
      col = viridis(max(tmp))[tmp],
      pch = 19, cex = 0.5, main = "colored by prediction")

pairs(h2$dat$data, 
      col = c('darkblue', 'violet')[cl$gaba +1],
      pch = 19, cex = 0.5, main = "colored by truth")

plotDend(h2)

##      1      2 
## 0.5975 0.4025
stackM(h2, depth = 2, centered = TRUE, ccol = ccol)

hist(perms, xlim = c(min(perms), p0 + 0.25*p0),
     main = "permutation test of ARI values", probability = TRUE)
#hist(perms, probability = TRUE)
abline(v = p0, col = 'red')

df1 <- data.frame(loc)
df1$gaba <- as.factor(cl$gaba)
df1$classification <- as.factor(tmp -1)
df1$correct_classification <- (df1$classification == df1$gaba)
df1$TN <- (df1$classification == 0 & df1$gaba == 0)
df1$FN <- (df1$classification == 0 & df1$gaba == 1)
df1$FP <- (df1$classification == 1 & df1$gaba == 0)
df1$TP <- (df1$classification == 1 & df1$gaba == 1)
#df1$text <- links

p1 <- ggplot(df1, aes(x = x, y=y,z=z, col = gaba, shape =
                      correct_classification)) + 
  facet_wrap(~ z, ncol = 6) +
  geom_point()

p1

2.0.1 same as above with interactivity

ggplotly(p1)
#p2 <- plot_ly(df1, x = ~x, y = ~y, color = ~gaba,
#             hoverinfo = 'text',
#               text = ~links)
#p2
#
#htmlwidgets::saveWidget(as_widget(p2), "links.html")

2.0.2 Confusion matrix (classfication on rows)

(ta <- table(classification = tmp-1, df1$gaba))
##               
## classification   0   1
##              0 140   1
##              1  74  21

The above table shows that out of 22 true gaba synapses 1 were mis-classified as non-gaba synapses.

4 RF

set.seed(317)
rfdat <- data.table(gaba = as.factor(cl$gaba), sfmax)
rf1 <- randomForest(gaba ~ ., data = rfdat)

#set.seed(317)
set.seed(2^13)
train <- sample(nrow(rfdat), 100)
test <- setdiff(1:nrow(rfdat), train)
table(rfdat[test,]$gaba)
## 
##   0   1 
## 123  13
rf2 <- randomForest(gaba ~ ., data = rfdat[train,], importance = TRUE)
print(rf2)
## 
## Call:
##  randomForest(formula = gaba ~ ., data = rfdat[train, ], importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 9%
## Confusion matrix:
##    0 1 class.error
## 0 90 1  0.01098901
## 1  8 1  0.88888889
rf2.pred <- predict(rf2, newdata = rfdat[test,-c('gaba')])

table(rf2.pred, rfdat[test,]$gaba)
##         
## rf2.pred   0   1
##        0 123   7
##        1   0   6
varImpPlot(rf2)