Loading the class labels sent by Forrest
cl <- read.csv("cleft_class.csv")[-18,]
fmax <- read.csv("collman15v2_Fmax_20171219.csv")[,-c(1:3)]
sfmax <- scale(fmax, center = TRUE, scale = TRUE)
loc <- read.csv("locations_collman15v2_Fmax_20171219.csv")
table(cl$gaba)
##
## 0 1
## 214 22
table(cl$postgaba)
##
## 0 1
## 212 24
#ccol <- read.csv("params.csv")[1,-c(1,14)]
ccol <- c('blue', 'blue', 'blue', 'red', 'red', 'red', 'black', 'black', 'green', 'green', 'green', 'green')
ccol <- ccol[-c(1:3)]
set.seed(1234)
L <- list()
L[[1]] <- mlocation(fmax, ccol = ccol)
L[[2]] <- d1heat(sfmax, ccol = ccol)
L[[3]] <- cumvar(sfmax)
L[[4]] <- outliers(sfmax)
L[[5]] <- pairhex(sfmax, ccol = ccol)
L[[6]] <- medacor(fmax, ccol = ccol)
set.seed(1234)
L[[7]] <- hmc(sfmax, ccol = ccol,
maxDepth = 2, modelNames = "VVV")
#plot(L[[7]])
#plotDend(L[[7]])
#stackM(L[[7]], ccol = ccol, centered = TRUE)
Note that we are using VVV
as the model.
## 1 2
## 0.5636 0.4364
Here we restrict our hierarchical GMM function to one split only and then compare with the true gaba/non-gaba class labels.
set.seed(1030)
#set.seed(1234)
h2 <- hmc(sfmax, ccol = ccol, maxDepth = 2, modelNames = 'VVE')
l2 <- h2$dat$labels$col - 1
p0 <- mclust::adjustedRandIndex(l2, cl$gaba)
perms <- foreach(i = 1:1e4, .combine = c) %dopar% {
set.seed(i*2)
mclust::adjustedRandIndex(sample(l2), cl$gaba)
}
tmp <- h2$dat$labels$col
pairs(h2$dat$data,
col = viridis(max(tmp))[tmp],
pch = 19, cex = 0.5, main = "colored by prediction")
pairs(h2$dat$data,
col = c('darkblue', 'violet')[cl$gaba +1],
pch = 19, cex = 0.5, main = "colored by truth")
plotDend(h2)
## 1 2
## 0.5975 0.4025
stackM(h2, depth = 2, centered = TRUE, ccol = ccol)
hist(perms, xlim = c(min(perms), p0 + 0.25*p0),
main = "permutation test of ARI values", probability = TRUE)
#hist(perms, probability = TRUE)
abline(v = p0, col = 'red')
df1 <- data.frame(loc)
df1$gaba <- as.factor(cl$gaba)
df1$classification <- as.factor(tmp -1)
df1$correct_classification <- (df1$classification == df1$gaba)
df1$TN <- (df1$classification == 0 & df1$gaba == 0)
df1$FN <- (df1$classification == 0 & df1$gaba == 1)
df1$FP <- (df1$classification == 1 & df1$gaba == 0)
df1$TP <- (df1$classification == 1 & df1$gaba == 1)
#df1$text <- links
p1 <- ggplot(df1, aes(x = x, y=y,z=z, col = gaba, shape =
correct_classification)) +
facet_wrap(~ z, ncol = 6) +
geom_point()
p1
ggplotly(p1)
#p2 <- plot_ly(df1, x = ~x, y = ~y, color = ~gaba,
# hoverinfo = 'text',
# text = ~links)
#p2
#
#htmlwidgets::saveWidget(as_widget(p2), "links.html")
(ta <- table(classification = tmp-1, df1$gaba))
##
## classification 0 1
## 0 140 1
## 1 74 21
The above table shows that out of 22 true gaba synapses 1 were mis-classified as non-gaba synapses.
Note that the centroids are caluclated from the annotations which means that the centorid might not be in an area with annotated pixels.
FalseNegatives |
---|
914, 3286, 19 |
set.seed(317)
rfdat <- data.table(gaba = as.factor(cl$gaba), sfmax)
rf1 <- randomForest(gaba ~ ., data = rfdat)
#set.seed(317)
set.seed(2^13)
train <- sample(nrow(rfdat), 100)
test <- setdiff(1:nrow(rfdat), train)
table(rfdat[test,]$gaba)
##
## 0 1
## 123 13
rf2 <- randomForest(gaba ~ ., data = rfdat[train,], importance = TRUE)
print(rf2)
##
## Call:
## randomForest(formula = gaba ~ ., data = rfdat[train, ], importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 9%
## Confusion matrix:
## 0 1 class.error
## 0 90 1 0.01098901
## 1 8 1 0.88888889
rf2.pred <- predict(rf2, newdata = rfdat[test,-c('gaba')])
table(rf2.pred, rfdat[test,]$gaba)
##
## rf2.pred 0 1
## 0 123 7
## 1 0 6
varImpPlot(rf2)