Loading the class labels sent by Forrest
cl <- read.csv("cleft_class.csv")[-18,]
f0 <- read.csv("collman15v2_tightAnnotationsF0.csv")
sf0 <- scale(f0, center = TRUE, scale = TRUE)
loc <- read.csv("locationstest20171211.csv")
table(cl$gaba)
##
## 0 1
## 214 22
table(cl$postgaba)
##
## 0 1
## 212 24
#ccol <- read.csv("params.csv")[1,-c(1,14)]
ccol <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'green', 'green')
set.seed(1030)
L <- runAll(sf0, ccol = ccol)
L[[1]] <- mlocation(f0, ccol = ccol)
## 1 21 22
## 0.7712 0.1314 0.0975
Here we restrict our hierarchical GMM function to one split only and then compare with the true gaba/non-gaba class labels.
set.seed(1030)
h2 <- hmc(sf0, maxDepth = 2)
l2 <- h2$dat$labels$col - 1
p0 <- mclust::adjustedRandIndex(l2, cl$gaba)
perms <- foreach(i = 1:1e4, .combine = c) %dopar% {
set.seed(i*2)
mclust::adjustedRandIndex(sample(l2), cl$gaba)
}
tmp <- h2$dat$labels$col
pairs(h2$dat$data,
col = viridis(max(tmp))[tmp],
pch = 19, cex = 0.5, main = "colored by prediction")
pairs(h2$dat$data,
col = c('darkblue', 'violet')[cl$gaba +1],
pch = 19, cex = 0.5, main = "colored by truth")
plotDend(h2)
## 1 2
## 0.8093 0.1907
stackM(h2, depth = 2, centered = TRUE, ccol = ccol)
hist(perms, xlim = c(min(perms), p0 + 0.25*p0),
main = "permutation test of ARI values")
abline(v = p0, col = 'red')
df1 <- data.frame(loc)
df1$gaba <- as.factor(cl$gaba)
df1$classification <- as.factor(tmp -1)
df1$correct_classification <- (df1$classification == df1$gaba)
df1$TN <- (df1$classification == 0 & df1$gaba == 0)
df1$FN <- (df1$classification == 0 & df1$gaba == 1)
df1$FP <- (df1$classification == 1 & df1$gaba == 0)
df1$TP <- (df1$classification == 1 & df1$gaba == 1)
#df1$text <- links
p1 <- ggplot(df1, aes(x = x, y=y,z=z, col = gaba, shape =
correct_classification)) +
facet_wrap(~ z, ncol = 6) +
geom_point()
p1
ggplotly(p1)
#p2 <- plot_ly(df1, x = ~x, y = ~y, color = ~gaba,
# hoverinfo = 'text',
# text = ~links)
#p2
#
#htmlwidgets::saveWidget(as_widget(p2), "links.html")
(ta <- table(classification = tmp-1, df1$gaba))
##
## classification 0 1
## 0 189 2
## 1 25 20
The above table shows that out of 22 true gaba synapses 2 were mis-classified as non-gaba synapses.
Note that the centroids are caluclated from the annotations which means that the centorid might not be in an area with annotated pixels.
FalseNegatives |
---|
3826, 2554, 18 |
4065, 2605, 15 |
set.seed(317)
rfdat <- data.table(gaba = as.factor(cl$gaba), sf0)
rf1 <- randomForest(gaba ~ ., data = rfdat)
#set.seed(317)
set.seed(2^13)
train <- sample(nrow(rfdat), 100)
test <- setdiff(1:nrow(rfdat), train)
table(rfdat[test,]$gaba)
##
## 0 1
## 123 13
rf2 <- randomForest(gaba ~ ., data = rfdat[train,], importance = TRUE)
print(rf2)
##
## Call:
## randomForest(formula = gaba ~ ., data = rfdat[train, ], importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 3%
## Confusion matrix:
## 0 1 class.error
## 0 91 0 0.0000000
## 1 3 6 0.3333333
rf2.pred <- predict(rf2, newdata = rfdat[test,-c('gaba')])
table(rf2.pred, rfdat[test,]$gaba)
##
## rf2.pred 0 1
## 0 122 2
## 1 1 11
varImpPlot(rf2)