From: joshua vogelstein jovo@jhu.edu Mon, Oct 8, 2018 at 6:19 PM To: Carey E Priebe cep@jhu.edu, Jesse Patsolic studiojlp@gmail.com, Benjamin Falk falk.ben@jhu.edu, joshua vogelstein jovo@jhu.edu
cep - EM provides “gold standard” labels in images several different AT channels provide markers. somebody ran an algorithm, to detect synapses, and missed many. afaict, it would be literally impossible for any algorithm to detect them, because the “information content” is not there. the data are: (X_i,Y_i), i =1,…,n where Y_i is binary X_i natively lives in LxWxHxC space (or something close to it, a few voxels per plane, a few planes, and about 10 channels). we have ways that we like to reduce this to merely C, that is, the number of channels. we have ways to reduce C to 2. we have about 1000 Y_i=1 and can obtain, say, 1M Y_i=0. sometimes, however, when Y_i = 1, meaning the X_i’s should be relatively large, they are zero.
…, it suggests the following analysis: centered at each centroid, just sum up all the voxels within a cube, call that t_i then, let’s plot the fraction of detected synapses with t_i > t, and vary t from 0 to max t_i let’s do that for each of the 2 “important channels”, and/or all the channels. if the magnitude is not large enough, there is no function of the cube that could possibly detect the synapse.
tsseq <- seq(0, max(dats$synapsin), by = 0.1)
tpseq <- seq(0, max(dats$psd), by = 0.1)
ntsseq <- seq(0, max(datns$synapsin), by = 0.1)
ntpseq <- seq(0, max(datns$psd), by = 0.1)
tSyn <- sapply(tsseq, function(x) sum(dats$synapsin > x) / nrow(dats))
tPSD <- sapply(tpseq, function(x) sum(dats$psd > x) / nrow(dats))
tnSyn <- sapply(ntsseq, function(x) sum(datns$synapsin > x) / nrow(datns))
tnPSD <- sapply(ntpseq, function(x) sum(datns$psd > x) / nrow(datns))
#tsseq <- seq(0, max(dats$synapsin), by = 0.1)
#tpseq <- seq(0, max(dats$psd), by = 0.1)
tseq <- seq(0, max(dats$psd, dats$synapsin), by = 0.1)
tSyn <- sapply(tseq, function(x) sum(dats$synapsin > x) / nrow(dats))
tPSD <- sapply(tseq, function(x) sum(dats$psd > x) / nrow(dats))
tSP <- sapply(tseq, function(x) sum((dats$psd + dats$syn) > x) / nrow(dats))
d1 <- data.frame(t = tseq, ti = tSyn, Channel = "synapsin")
d2 <- data.frame(t = tseq, ti = tPSD, Channel = "PSD95")
d3 <- data.frame(t = tseq, ti = tSP, Channel = "synapsin + PSD95")
ntseq <- seq(0, max(datns$psd, dats$synapsin), by = 0.1)
ntSyn <- sapply(ntseq, function(x) sum(datns$synapsin > x) / nrow(datns))
ntPSD <- sapply(ntseq, function(x) sum(datns$psd > x) / nrow(datns))
ntSP <- sapply(ntseq, function(x) sum((datns$psd + datns$syn) > x) / nrow(datns))
d4 <- data.frame(t = ntseq, ti = ntSyn, Channel = "synapsin")
d5 <- data.frame(t = ntseq, ti = ntPSD, Channel = "PSD95")
d6 <- data.frame(t = ntseq, ti = ntSP, Channel = "synapsin + PSD95")
d0s <- data.table(rbind(d1, d2, d3), type = 'synapse')
d0ns <- data.table(rbind(d4, d5, d6), type = 'non-synapse')
d0 <- data.table(rbind(d0s, d0ns))
ggplot(data = d0, aes(x = t, y = ti, group = Channel, col = Channel)) +
scale_y_continuous(breaks = seq(0, max(d0$ti), by = 0.1)) +
geom_line(alpha = 0.75) +
geom_vline(xintercept = 2.65, col = 'salmon', lty = 2) +
geom_vline(xintercept = 4.11, col = 'darkgreen', lty = 2) +
ylab(TeX("$t_i$")) +
facet_grid(type ~ .)
#ggplot(data = d0, aes(x = t, y = ti, group = Channel, col = Channel)) +
# scale_y_continuous(breaks = seq(0, max(d0$ti), by = 0.1)) +
# geom_line(alpha = 0.75) +
# geom_vline(xintercept = 2.65, col = 'salmon', lty = 2) +
# geom_vline(xintercept = 4.11, col = 'darkgreen', lty = 2) +
# ylab(TeX("$t_i$"))
#
#ggplot(data = d0n, aes(x = t, y = ti, group = Channel, col = Channel)) +
# scale_y_continuous(breaks = seq(0, max(d0$ti), by = 0.1)) +
# geom_line(alpha = 0.75) +
# geom_vline(xintercept = 2.65, col = 'salmon', lty = 2) +
# geom_vline(xintercept = 4.11, col = 'darkgreen', lty = 2) +
# ylab(TeX("$t_i$"))
hist(dats$synapsin, prob = TRUE)