In this notebook, we compare the performance of LOL to that of several other linear embedding techniques across the problems from the UCI repository and PMLB repository that can be classified as high-dimensionality, low sample-size (HDLSS) where \(d > 100\). The data below was collected with \(k=50\) fold validation. Testing sets were rotated across all folds, with the training set comprising the remaining \(k-1=49\) folds. As only a handful of problems are low-rank and therefore HDLSS \(n < d\), we subsample the training set to be \(\textrm{min}(n\frac{k-1}{k}, d-1)\) where \(n\) is the number of provided samples, \(\frac{k-1}{k} = \frac{49}{50}\) is the fraction of samples for training, and \(d\) is the native dimensionality of the data. This ensures that all examples shown below are on HDLSS data. The datasets were run using the real data driver script.

require(lolR)
require(ggplot2)
require(latex2exp)
require(MASS)
require(gridExtra)
require(data.table)
require(reshape2)
require(R.matlab)
require(grid)
require(plyr)
require(slb)
require(scales)
require(stringr)
require(ggbeeswarm)
classifier.name <- "lda"
opath <- './data/real_data'
repo.name <- "uci"

# compute the cutoff for the particular trial to get an approximate elbow
# by computing the smallest r with an associated lhat within 5%
# of the global minimum lhat
compute_cutoff <- function(rs, lhats, t=0.05) {
  rs <- rs[complete.cases(lhats) & complete.cases(rs)]; lhats <- lhats[complete.cases(lhats) & complete.cases(rs)]
  sr.ix <- sort(rs, decreasing=FALSE, index.return=TRUE)$ix
  # compute minimum value
  min.lhat <- min(lhats)
  # compute minimum value + 5%
  lhat.thresh <- (1 + t)*min.lhat
  # find which indices are all below this
  lhat.below <- which(lhats <= lhat.thresh)
  rs.below <- rs[lhat.below]; lhats.below <- lhats[lhat.below]
  tmin.ix <- min(rs.below, index.return=TRUE)
  return(list(r=rs.below[tmin.ix], lhat=lhats.below[tmin.ix]))
}

w=.8
h=.2
plot_sim_lhats <- function(data_sub, data_sub.optimalr, cols, linetype, shape, title="", from=10, ylab=TeX("$\\hat{L}$"),
                           xlab="Embedded Dimensions", fsize=12, length.out=3) {
  lims <- c(floor(10*min(data_sub.optimalr$lhat.alg, na.rm=TRUE))/10, ceiling(10*max(data_sub.optimalr$lhat.alg, na.rm=TRUE))/10)
  if (is.na(sum(lims))) {
    return(ggplot())
  }
  tryCatch({
    breaks = unique(round(seq(from=lims[1], to=lims[2], length.out = length.out), digits=1))
    xlims <- c(min(data_sub$r.alg, na.rm=TRUE), max(data_sub$r.alg, na.rm=TRUE))
    xbreaks <- round(seq(from=from, to=xlims[2], length.out=length.out))
    plot_sims <- ggplot(data_sub, aes(x=r.alg, y=lhat.alg, linetype=alg, shape=alg, fill=alg, color=alg)) +
      stat_summary(size=.95, fun.y="mean", geom="line") +
      stat_summary(fun.data = mean_cl_normal, geom = "errorbar", fun.args = list(mult = 1)) +
      geom_point(data=data_sub.optimalr, size=2) +
      scale_color_manual(values=cols, limits=names(cols),
                         guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") +
      scale_fill_manual(values=cols, limits=names(cols),
                        guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") +
      scale_shape_manual(values=shape, limits=names(cols),
                         guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") +
      scale_linetype_manual(values=linetype, limits=names(cols),
                         guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") +
      xlab(xlab) +
      ylab(ylab) +
      ggtitle(title) +
      theme_bw() +
      coord_cartesian(ylim=lims) +
      scale_x_continuous(limits=xlims, breaks=xbreaks) +
      theme(plot.margin = unit(c(h,w,h,h), "cm")) +
      theme(legend.position="bottom", text=element_text(size=fsize))
    return(plot_sims)
  }, error=function(e) {return(ggplot())})
}

plot_sim_scatter <- function(data.sub.foldwise, cols, shape, title="",
                             ylab=TeX("$\\frac{\\hat{L}_{LOL} - \\hat{L}_{alg}}{\\hat{L}_{chance}}$"), psize=1,
                             xlab="Algorithm", fsize=12) {
  tryCatch({
      plot_sims <- ggplot(dat=subset(data.sub.foldwise, alg != "LOL"), aes(x=alg, y=lhat.norm, color=alg, group=alg)) +
        geom_beeswarm(alpha=0.5, size=psize) +
        xlab("Algorithm") +
        scale_color_manual(values=cols, limits=names(cols),
                           guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") +
        scale_fill_manual(values=cols, limits=names(cols),
                          guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") +
        scale_shape_manual(values=shape, limits=names(cols),
                           guide=guide_legend(nrow=2, byrow=TRUE), name="Algorithm") +
        xlab(xlab) +
        ylab(ylab) +
        ggtitle(title) +
        theme_bw() +
        theme(plot.margin = unit(c(h,w,h,h), "cm")) +
        theme(legend.position="bottom", text=element_text(size=fsize))
      return(plot_sims)
  }, error=function(e) {return(ggplot())})
}



g_legend<-function(a.gplot){
  tmp <- ggplot_gtable(ggplot_build(a.gplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}

Results Loading

results <- readRDS(file.path(opath, paste(classifier.name, "_results.rds", sep="")))
# filter out bad rows
results <- results[complete.cases(results$lhat) & !(is.infinite(results$lhat)) & complete.cases(results),]
exp_names <- unique(as.character(results$exp))
# make sure columns of interest are numeric
numeric.cols <- c("d", "n", "ntrain", "K", "fold", "r", "lhat")
for (col in numeric.cols) {
  results[[col]] <- as.numeric(results[[col]])
  if (col == "ntrain") {  # make sure that ntrain is constant across experiments, due to situation where one fold has fewer examples than another
    for (exp in exp_names) {
      results[results$exp == exp,]$ntrain = median(results[results$exp == exp,]$ntrain, na.rm=TRUE)
    }    
  }
}
results <- results[results$d > 100 & results$ntrain > 50,]
# filter out duplicate experiments
results <- results[!(results$exp %in% c("splice", "promoters", "molecular-biology_promoters")),]
nan.mean <- function(x) {mean(x[!is.infinite(x)], na.rm=TRUE)}
results.means <- aggregate(lhat ~ exp + alg + r + d + n + ntrain + K, data = results, FUN = nan.mean)
random.results <- aggregate(lhat ~ exp + alg, data=subset(results, alg == "RandomGuess"), FUN=mean)
#algs <-  c("LOL", "QOQ", "PLS", "PLSOL", "PLSOLK", "CCA", "LDA", "PCA", "RP")
#acols <- c("#00FF00", "#00FF00", "#990000", "#990000", "#990000", "#AAAA55", "#000099", "#000099", "#000099")
#linestyle <- c("solid", "dashed", "solid", "dashed", "dotted", "solid", "solid", "dashed", "dotted")
algs <-  c("LOL", "PLS", "CCA", "LRLDA", "PCA", "RP")
acols <- c("#00FF00", "#990000", "#AAAA55", "#000099", "#d06900", "#800080")
linestyle <- c("solid", "dashed", "solid","solid", "solid", "solid")
names(linestyle) <- algs
names(algs) <- acols
names(acols) <- algs
#shapes <- c(21, 24, 21, 24, 23, 23, 21, 24, 23)
shapes <- c(21, 24, 21, 24, 23, 23)
names(shapes) <- algs

Analysis

WRT LOL

#nan.median <- function(x) median(x, na.rm=TRUE)  
lhat.mean <- 1
#results.medians <- aggregate(lhat ~ exp + alg + r + d + n + K, data = results, FUN = nan.median)
results.optimalr <- data.frame()
results.overall <- data.frame()
for (i in 1:length(exp_names)) {
  r.max <- max(results.means[results.means$exp == as.character(exp_names[i]),]$r)
  ss.chance <- results[results$alg == "RandomGuess" & results$exp == as.character(exp_names[i]),]
  colnames(ss.chance)[colnames(ss.chance) == "lhat"] = "lhat.chance"
  for (j in 1:length(algs)) {
    tryCatch({
      alg <- as.character(algs[j])
      ss <- results[results$exp == as.character(exp_names[i]) & results$alg == algs[j],]
      ss.means <- aggregate(lhat ~ r + n + d, data=ss, FUN = nan.mean)
      rs <- ss.means$r; lhats <- ss.means$lhat
      min.result <- compute_cutoff(rs, lhats)
      r.star <- min.result$r
      ss.optimalr <- results[results$exp == exp_names[i] & results$alg == algs[j] & results$r == r.star,]
      if (alg == 'LOL') {
        r.lol <- r.star
        lol.ss <- results[results$exp == exp_names[i] & results$alg == algs[j],]
        colnames(lol.ss)[colnames(lol.ss) == "lhat"] = "lhat.lol"
        lol.optimalr <- ss.optimalr
        colnames(lol.optimalr)[colnames(lol.optimalr) == "lhat"] = "lhat.lol"
        colnames(lol.optimalr)[colnames(lol.optimalr) == "rstar.norm"] = "rstar.norm.lol"
      }
      colnames(ss)[colnames(ss) == "lhat"] = "lhat.alg"
      colnames(ss.optimalr)[colnames(ss.optimalr) == "lhat"] = "lhat.alg"
      ss.merged <- merge(ss, lol.ss, by=c("exp", "fold", "n", "d", "K", "ntrain", "repo", "xv"))
      ss.merged <- merge(ss.merged, ss.chance, by=c("exp", "fold", "n", "d", "K", "ntrain", "repo", "xv"), all=TRUE)
      
      ss.optimalr <- merge(ss.optimalr, lol.optimalr, by=c("exp", "fold", "n", "d", "K", "ntrain", "repo", "xv"))
      ss.optimalr <- merge(ss.optimalr, ss.chance, by=c("exp", "fold", "n", "d", "K", "ntrain", "repo", "xv"), all=TRUE)
      
      
      colnames(ss.merged)[colnames(ss.merged) == "r.x"] = "r.alg"
      colnames(ss.optimalr)[colnames(ss.optimalr) == "r.x"] = "r.alg"
      colnames(ss.merged)[colnames(ss.merged) == "r.y"] = "r.lol"
      colnames(ss.optimalr)[colnames(ss.optimalr) == "r.y"] = "r.lol"
      colnames(ss.merged)[colnames(ss.merged) == "alg"] <- "chance"
      colnames(ss.merged)[colnames(ss.merged) == "alg.x"] <- "alg"
      colnames(ss.optimalr)[colnames(ss.optimalr) == "alg"] <- "chance"
      colnames(ss.optimalr)[colnames(ss.optimalr) == "alg.x"] <- "alg"
      ss.merged$lhat.norm <- (ss.merged$lhat.lol - ss.merged$lhat.alg)/ss.merged$lhat.chance
      ss.merged$r.max <- r.max
      
      ss.optimalr$lhat.norm <- (ss.optimalr$lhat.lol - ss.optimalr$lhat.alg)/ss.optimalr$lhat.chance
      ss.optimalr$rstar.norm <- (ss.optimalr$r.lol - ss.optimalr$r.alg)/r.max
      ss.optimalr$r.max <- r.max
      results.overall <- rbind(results.overall, ss.merged)
      results.optimalr <- rbind(results.optimalr, ss.optimalr)
    }, error=function(e) {print(NULL)})
  }
}
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
results.exp.overall <- aggregate(list(lhat.norm=results.overall$lhat.norm, lhat.alg=results.overall$lhat.alg,
                                       lhat.lol=results.overall$lhat.lol, lhat.chance=results.overall$lhat.chance, r.max=results.overall$r.max),
                                  by=list(exp=results.overall$exp, n=results.overall$n, K=results.overall$K,
                                          ntrain=results.overall$ntrain, d=results.overall$d,
                                          repo=results.overall$repo, r.alg=results.overall$r.alg,
                                          alg=results.overall$alg), FUN=nan.mean)

results.exp.optimalr <- aggregate(list(lhat.norm=results.optimalr$lhat.norm, lhat.alg=results.optimalr$lhat.alg,
                                       lhat.lol=results.optimalr$lhat.lol, lhat.chance=results.optimalr$lhat.chance,
                                       rstar.norm=results.optimalr$rstar.norm, r.max=results.optimalr$r.max),
                                  by=list(exp=results.optimalr$exp, n=results.optimalr$n, K=results.optimalr$K,
                                          ntrain=results.optimalr$ntrain, d=results.optimalr$d,
                                          repo=results.optimalr$repo, r.alg=results.optimalr$r.alg,
                                          alg=results.optimalr$alg), FUN=nan.mean)

Per-Dataset Plots

Given algorithm \(a\) where \(L_{a, i}\) is the \(i^{th}\) fold’s misclassification rate for dataset \(j\) with \(d\) possibilities, and \(r \in [1, ..., p]\):

\[\begin{align*} \bar{L}_{a, j}(r) = \textrm{mean}_{i \in [1, ..., k]}\left\{L_{a, j, i}(r)\right\} \end{align*}\]

A single dot is produced where:

\[\begin{align*} r^*_{a, j} = \textrm{optimal argmin}_r\left\{\bar{L}_{a, j}(r)\right\} \end{align*}\]

which is defined as the optimum number of embedding dimensions. We use \(\textrm{optimal argmin}\) here to indicate the smallest \(r\) where \(\bar{L}_{a, i}(r)\) is within \(5\%\) of the global minimum. Then the optimal misclassification rate is \(\bar{L}_{a, j}\left(r^*_{a, j}\right)\).

plots.curves <- list()
plots.scatters <- list()
for (i in 1:length(exp_names)) {
  exp <- exp_names[i]
  data_sub <- results.overall[results.overall$exp == exp,]
  data_sub.optimalr <- results.exp.optimalr[results.exp.optimalr$exp == exp,]
  data_sub.optimalr.foldwise <- results.optimalr[results.optimalr$exp == exp,]
  tryCatch({
    plots.curves[[i]] <- plot_sim_lhats(data_sub, data_sub.optimalr, acols, linestyle, shapes, ylab="",
                                 title=sprintf("Exp %d, K=%d, n=%d, p=%d", i, data_sub[1,]$K, data_sub[1,]$n, data_sub[1,]$d), 
                                 from=1, fsize = 7)
    plots.scatters[[i]] <- plot_sim_scatter(data_sub.optimalr.foldwise, acols, shapes,
                                 title=sprintf("Exp %d, K=%d, n=%d, p=%d", i, data_sub[1,]$K, data_sub[1,]$n, data_sub[1,]$d),
                                 fsize=12)
    
  }, error=function(e){NaN})
}

curves.plot_leg <- g_legend(plots.curves[[1]])
scatters.plot_leg <- g_legend(plots.scatters[[1]])
plots.curves <- lapply(plots.curves, function(plot) plot + theme(legend.position=NaN) + xlab("") + ylab(""))
plots.curves[[1]] <- plots.curves[[1]] + xlab("Embedded Dimensions") + ylab("Misclassification Rate")
plots.scatters <- lapply(plots.scatters, function(plot) plot + theme(legend.position=NaN) + xlab("") + ylab(""))
plots.scatters[[1]] <- plots.scatters[[1]] + xlab("Algorithm") +
  ylab(TeX("Normalized Misclassification Rate"))

grid.arrange(arrangeGrob(grobs=plots.curves, nrow=floor(sqrt(length(plots.curves)))), curves.plot_leg, nrow=2, heights=c(0.8, .15))

grid.arrange(arrangeGrob(grobs=plots.scatters, nrow=floor(sqrt(length(plots.scatters)))), scatters.plot_leg, nrow=2, heights=c(0.8, .15))

Quadrant Plot

For algorithm \(a\) and problem \(j\), the normalized embedding dimension:

\[\begin{align*} \left|\left|r^*_{a, j}\right|\right| = \frac{r^*_{LOL, j} - r^*_{a, j}}{p} \end{align*}\]

with normalized misclassification rate per-dataset:

\[\begin{align*} \left|\left|\bar{L}_{a, j}(r)\right|\right| = \textrm{mean}_{i \in [1, ..., k]}\left\{\frac{\bar{L}_{a, j, i}\left(r^*_{LOL, j}\right) - \bar{L}_{a, j, i}\left(r^*_{a, j}\right)}{\bar{L}_{chance, j, i}}\right\} \end{align*}\]

where the \(chance\) classifier is simply the classifier that guesses the “most-present” class (the class with the highest prior) in the particular fold.

make_marginal_2d <- function(data, xlims, ylims, plot.title="", xl="", yl="", leg.title="",
                             legend.style=guide_legend(ncol=2, byrow=TRUE)) {
  data$exp <- factor(data$exp)
  box <- data.frame(x=c(max(xlims), mean(xlims), mean(xlims), max(xlims)),
                    y=c(max(ylims), max(ylims), mean(ylims), mean(ylims)))
  center <- ggplot(data, aes(x=rstar.norm, y=lhat.norm)) +
    geom_polygon(data=box, aes(x=x, y=y), fill='red', alpha=0.15) +
    geom_point(aes(x=rstar.norm, y=lhat.norm, shape=alg, fill=alg), alpha=0.6, size=1.2) +
    scale_fill_manual(values=acols, guide=legend.style, name=leg.title) +
    scale_shape_manual(values=shapes, guide=legend.style, name=leg.title) +
    ylab(yl) +
    xlab(xl) +
    labs(shape="Simulation", color="Algorithm") +
    ggtitle(plot.title) +
    scale_y_continuous(limits=ylims) +
    scale_x_continuous(limits=xlims) +
    theme_bw()
  center_leg <- ggplot(data, aes(x=rstar.norm, y=lhat.norm)) +
    geom_polygon(data=box, aes(x=x, y=y), fill='red', alpha=0.15) +
    geom_point(aes(x=rstar.norm, y=lhat.norm, shape=alg, fill=alg), size=2) +
    scale_fill_manual(values=acols, guide=legend.style, name=leg.title) +
    scale_shape_manual(values=shapes, guide=legend.style, name=leg.title) +
    ylab(yl) +
    xlab(xl) +
    labs(shape="Simulation", color="Algorithm") +
    ggtitle(plot.title) +
    scale_y_continuous(limits=ylims) +
    scale_x_continuous(limits=xlims) +
    theme_bw()
  leg <- g_legend(center_leg)
  center <- center + theme(legend.position=NaN)
  right <- ggplot(data, aes(x=lhat.norm, y=..scaled.., color=alg, linetype=alg)) +
    geom_density() +
    scale_color_manual(values=acols, guide=legend.style, name=leg.title) +
    scale_fill_manual(values=acols, guide=legend.style, name=leg.title) +
    scale_linetype_manual(values=linestyle, guide=legend.style, name=leg.title) +
    scale_x_continuous(limits=ylims) +
    ylab("") +
    xlab("") +
    ggtitle("") +
    theme_bw() +
    theme(legend.position=NaN) +
    coord_flip()
  top <- ggplot(data, aes(x=rstar.norm, y=..scaled.., color=alg, linetype=alg)) +
    geom_density() +
    scale_color_manual(values=acols, guide=legend.style, name=leg.title) +
    scale_linetype_manual(values=linestyle, guide=legend.style, name=leg.title) +
    scale_x_continuous(limits=xlims) +
    ylab("") +
    xlab("") +
    ggtitle("") +
    theme_bw() + 
    theme(legend.position=NaN)
  return(arrangeGrob(top, leg, center + theme(legend.position=NaN), right, ncol=2, nrow=2, widths=c(4,1.5), heights=c(2,4)))
}

grid.arrange(make_marginal_2d(subset(results.exp.optimalr, !(alg %in% c("LOL"))),
                              c(-1, 1), c(-.15, .15), plot.title="Real Data Results", leg.title="Algorithm",
                              xl="Normalized Embedding Dimension", yl="Normalized Misclassification Rate"))

Aggregated over Datasets Plot

The average misclassification rate per-algorithm aggregated over all problems at \(r_{a, j}^*\):

results.exp.optimalr.means <- aggregate(list(lhat.alg=results.exp.optimalr$lhat.alg, r.alg=results.exp.optimalr$r.alg),
                                        by=list(alg=results.exp.optimalr$alg), FUN=nan.mean)
print("Optimal")
## [1] "Optimal"
for (j in 1:length(algs)) {
  print(sprintf("Mean Lhat for %s: %.3f", algs[j], results.exp.optimalr.means[results.exp.optimalr.means$alg == algs[j],]$lhat.alg))
  print(sprintf("Mean rstar for %s: %.3f", algs[j], results.exp.optimalr.means[results.exp.optimalr.means$alg == algs[j],]$r.alg))
}
## [1] "Mean Lhat for LOL: 0.179"
## [1] "Mean rstar for LOL: 33.647"
## [1] "Mean Lhat for PLS: 0.177"
## [1] "Mean rstar for PLS: 13.235"
## [1] "Mean Lhat for CCA: 0.261"
## [1] "Mean rstar for CCA: 131.765"
## [1] "Mean Lhat for LRLDA: 0.209"
## [1] "Mean rstar for LRLDA: 150.706"
## [1] "Mean Lhat for PCA: 0.177"
## [1] "Mean rstar for PCA: 57.235"
## [1] "Mean Lhat for RP: 0.231"
## [1] "Mean rstar for RP: 80.176"
ggplot(results.exp.optimalr.means, aes(x=r.alg, y=lhat.alg, shape=alg, fill=alg)) +
    geom_point(size=2) +
    scale_fill_manual(values=acols, guide=guide_legend(ncol=2, byrow=TRUE), name="Algorithm") +
    scale_shape_manual(values=shapes, guide=guide_legend(ncol=2, byrow=TRUE), name="Algorithm") +
    ylab("Misclassification Rate") +
    xlab("Embedding Dimension") +
    labs(shape="Simulation", color="Algorithm") +
    ggtitle("Real Data Average Results") +
    theme_bw()

Heatmaps

We use a one-sided wilcoxon test for the following 2 hypotheses for algorithms \(u\) and \(v\) per dataset \(j\):

\[\begin{align*} H_0^{r, u, v}: r^*_{u, j} \geq r^*_{v, j} \\ H_A^{r, u, v}: r^*_{u, j} < r^*_{v, j} \end{align*}\]

that the number of embedding dimensions on a single dataset is lower for algorithm \(u\) than algorithm \(v\), and:

\[\begin{align*} H_0^{L, u, v}: \bar{L}_{u, j}\left(r^*_{u, u}\right) \geq \bar{L}_{v, j}\left(r^*_{v, j}\right) \\ H_A^{L, u, v}: \bar{L}_{u, j}\left(r^*_{u, u}\right) < \bar{L}_{v, j}\left(r^*_{v, j}\right) \end{align*}\]

that the misclassification rate on a single dataset is lower for algorithm \(u\) than algorithm \(v\).

rhat.test <- data.frame(x=c(), y=c(), p=c())
lhat.test <- data.frame(x=c(), y=c(), p=c())
for (i in 1:length(algs)) {
  i.ss <- results.exp.optimalr[results.exp.optimalr$alg == algs[i],]
  for (j in 1:length(algs)) {
    tryCatch({
      if (algs[i] == algs[j]) {
        rhat.test <- rbind(rhat.test, data.frame(x=algs[i], y=algs[j], p=NaN))
        lhat.test <- rbind(lhat.test, data.frame(x=algs[i], y=algs[j], p=NaN))
      } else {
        j.ss <- results.exp.optimalr[results.exp.optimalr$alg == algs[j],]
        cmp <- merge(i.ss, j.ss, by=c("exp"), all=TRUE)
        rhat.test <- rbind(rhat.test, data.frame(x=algs[j], y=algs[i], p=wilcox.test(cmp$r.alg.x, cmp$r.alg.y,
                                                                                     alternative = "less", paired=TRUE)$p.value))
        lhat.test <- rbind(lhat.test, data.frame(x=algs[j], y=algs[i], p=wilcox.test(cmp$lhat.alg.x/cmp$lhat.chance.x, cmp$lhat.alg.y/cmp$lhat.chance.y,
                                                                                     alternative = "less", paired=TRUE)$p.value))
      }
    }, error=function(e){NaN})
  }
}

lhat.test$x <- factor(lhat.test$x, levels = algs); lhat.test$y <- factor(lhat.test$y, levels = algs)
rhat.test$x <- factor(rhat.test$x, levels = algs); rhat.test$y <- factor(rhat.test$y, levels = algs)
rhat.test$p[rhat.test$p < .001] = .001
lhat.test$p[lhat.test$p < .001] = .001
lhat.hmap <- ggplot(lhat.test, aes(x=x, y=y, fill=p)) +
  geom_tile() +
  geom_text(aes(label = round(p, 3))) +
  scale_fill_gradientn(name=TeX("$p$-value"), trans="log", breaks=c(0.001, 0.01, 0.1, 1),
                       colours=rev(c("#f2f0f7", "#cbc9e2", "#9e9ac8", "#6a51a3")),
                       limits=c(0.001, 1)) +
  ggtitle(TeX("Test of whether $\\hat{L}_{u, j}^* < $\\hat{L}_{v, j}^*$")) +
  xlab("Algorithm u") +
  ylab("Algorithm v")

rhat.hmap <- ggplot(rhat.test, aes(x=x, y=y, fill=p)) +
  geom_tile() +
  geom_text(aes(label = round(p, 3))) +
  scale_fill_gradientn(name=TeX("$p$-value"), trans="log", breaks=c(0.001, 0.01, 0.1, 1),
                       colours=rev(c("#f2f0f7", "#cbc9e2", "#9e9ac8", "#6a51a3")),
                       limits=c(0.001, 1)) +
  ggtitle(TeX("Test of whether $r^*_{u, j} < r^*_{v, j}$")) +
  xlab("Algorithm u") +
  ylab("Algorithm v")

print(lhat.hmap)

print(rhat.hmap)

Per-Dataset

The normalized misclassification rate per-dataset in a beeswarm plot:

legend.style=guide_legend(ncol=2, byrow=TRUE)
ggplot(dat=subset(results.exp.optimalr, alg != "LOL"), aes(x=alg, y=lhat.norm, color=alg, group=alg)) +
  geom_beeswarm(alpha=0.5, size=2) +
  xlab("Algorithm") +
  scale_y_continuous(limits=c(-0.2, .15)) +
  scale_color_manual(values=acols, guide=legend.style, name="Algorithm") +
  ylab(TeX("$Mean\\left(\\frac{\\hat{L}_{LOL} - \\hat{L}_{Alg}}{\\hat{L}_{chance}}\\right)$")) +
  theme_bw()

And the difference between the normalized misclassification of LOL with that of PLS as a function of several different variables:

pls.ss <- subset(results.exp.optimalr, alg == "PLS")
pls.ss$lhat.mean <- apply(pls.ss[, c("lhat.alg", "lhat.lol")], 1, function(x) {as.numeric(mean(x[1], x[2], na.rm=TRUE))})
chancep <- ggplot(pls.ss, aes(x=lhat.chance, y=lhat.norm)) +
  geom_point(size=1.5) +
  geom_smooth(method = "loess", size = 1.5) +
  xlab(TeX("$\\hat{L}_{chance}$")) +
  ylab("")

pp <- ggplot(pls.ss, aes(x=d, y=lhat.norm)) +
  geom_point(size=1.5) +
  geom_smooth(method = "loess", size = 1.5) +
  xlab(TeX("p")) +
  ylab("") +
  scale_x_continuous(trans=log10_trans())

np <- ggplot(pls.ss, aes(x=n, y=lhat.norm)) +
  geom_point(size=1.5) +
  geom_smooth(method = "loess", size = 1.5) +
  xlab(TeX("n")) +
  ylab("") +
  scale_x_continuous(trans=log10_trans())

npp <- ggplot(pls.ss, aes(x=n/d, y=lhat.norm)) +
  geom_point(size=1.5) +
  geom_smooth(method = "loess", size = 1.5) +
  xlab(TeX("n/p")) +
  ylab("") +
  scale_x_continuous(trans=log10_trans())


kp <- ggplot(pls.ss, aes(x=K, y=lhat.norm)) +
  geom_point(size=1.5) +
  geom_smooth(method = "loess", size = 1.5) +
  ylab("") +
  xlab(TeX("K"))

mp <- ggplot(pls.ss, aes(x=lhat.mean, y=lhat.norm)) +
  geom_point(size=1.5) +
  geom_smooth(method = "loess", size = 1.5) +
  ylab("") +
  xlab(TeX("$Mean\\left(\\hat{L}_{LOL}, \\hat{L}_{PLS}\\right)$"))

pls.ss$alpha <- ifelse(pls.ss$lhat.norm < 0, 1, 0)
pls.ss$alpha <- factor(pls.ss$alpha, levels=c(1, 0))
acols <- c("#00ff00", "#ff0000")
names(acols) <- c(1, 0)
labs.alpha <- lapply(c("$\\hat{L}_{LOL} < \\hat{L}_{PLS}$", "$\\hat{L}_{LOL} > \\hat{L}_{PLS}$"), TeX)

np_diffp <- ggplot(pls.ss, aes(x=n, y=d, color=alpha)) +
  geom_point(size=1.5) +
  scale_x_continuous(trans=log10_trans()) +
  scale_color_manual(values=acols, labels=labs.alpha,
                     name=TeX("Normalized $Mean\\left(\\frac{\\hat{L}_{LOL} - \\hat{L}_{PLS}}{\\hat{L}_{chance}}\\right)$"))

layt.mtx <- rbind(c(1, 2, 3, 7, 7, 7),
                  c(4, 5, 6, 7, 7, 7))
grid.arrange(arrangeGrob(chancep, pp, np, npp, kp, mp, np_diffp,
             left=TeX("$Mean\\left(\\frac{\\hat{L}_{LOL} - \\hat{L}_{PLS}}{\\hat{L}_{chance}}\\right)$"),
             layout_matrix=layt.mtx))