diff --git a/NAMESPACE b/NAMESPACE index 154e70a..07d456e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,210 +1,208 @@ S3method("+",ggList) S3method(print,ggList) +export("%++%") +#export("%+%") export( "aggregate.bootstrap", "aggregate.bootstrap.list", "aggregate.challenge", "aggregate.ranked.list", "aggregateThenRank", "as.challenge", "as.relation.challenge.incidence", "as.relation.ranked.list", "bootstrap", "bootstrap.ranked.list", #"bootstrap.default", "boxplot.ranked.list", "boxplot.bootstrap.list", "boxplot.comparedRanks.list", #"check_strict_preference", "compareRanks", "compareRanks.ranked", "compareRanks.ranked.list", # "compareRanks.default", "consensus", "consensus.ranked.list", #"consensus.default", "Decision", "decision.challenge", "default_colors", "density.bootstrap.list", "dendrogram", "dendrogram.ranked.list", "extract.workflow", "kendall", "kendall.bootstrap.list", # "merge.list", "lineplot.challenge", "methodsplot","methodsplot.challenge", "network", "plot.network", "podium","podium.ranked.list", "podium.challenge",#"podium.default", "print.aggregated", "print.aggregated.list", "print.comparedRanks", "print.ranked", "print.ranked.list", #"quickmerge.list", "rank", "rank.aggregated.list", "rank.aggregatedRanks.list", "rank.challenge", #"rank.default", "rankFrequencies", "rankFrequencies.bootstrap", "rankFrequencies.bootstrap.list", #"rankFrequencies.default", #"rankNA2", "rankThenAggregate", "rankingHeatmap", "rankingHeatmap.ranked.list", "relation_dissimilarity.ranked.list", "report", "report.bootstrap.list", "second", "select.if", "select.if.aggregated.list", "select.if.comparedRanks.list", "select.if.list", "select.if.ranked.list", "significanceMap", "spearmansFootrule", "spearmansWeightedFootrule", "splitby", - "stability", "stabilityByAlgorithm", "stabilityByAlgorithmStacked","stabilityByTask", + "stability", "stabilityByAlgorithm", "stabilityByTask", "stability.ranked.list", "relation_dissimilarity", "stabilityByAlgorithm.bootstrap.list", - "stabilityByAlgorithmStacked.bootstrap.list", "stabilityByTask.bootstrap.list", "subset", "subset.aggregated.list", "subset.comparedRanks.list", "subset.list", "subset.ranked.list", "subset.bootstrap.list", #"which.top", "test", "test.challenge", "test.default", "testThenRank", "violin", "violin.bootstrap.list", "winner", "winner.bootstrap", "winner.bootstrap.list", "winner.default", "winner.ranked", "winner.ranked.list", "winnerFrequencies", "winnerFrequencies.bootstrap", "winnerFrequencies.bootstrap.list", "winnerFrequencies.default" ) importFrom("dplyr", "bind_rows","group_by","summarise","select_if","filter","mutate","right_join","anti_join","ungroup","arrange","desc") importFrom("rlang",":=",".data","!!") importFrom("reshape2","melt", "acast") importFrom("utils", "capture.output", "methods") importFrom("plyr", "llply") importFrom("knitr", "kable") importFrom("tidyr", "complete","expand") importFrom("purrr", "%>%") importFrom("rmarkdown", "render","word_document","pdf_document","html_document") importFrom("viridisLite", "viridis","cividis") importFrom("ggplot2", "aes","aes_string","geom_abline", "geom_bar", "geom_boxplot", "geom_count", "geom_density", "geom_jitter", "geom_line", "geom_point", "geom_raster", "geom_step", "geom_text", "geom_violin","annotate","guide_legend", "geom_vline", "ggplot", "ggtitle","vars","xlab","ylab","scale_size_area","theme_get","rel","geom_hline","ggplot_build","scale_fill_manual", "scale_y_continuous","coord_cartesian", "element_text", "facet_wrap", "position_jitter", "stat", "stat_summary", "theme", "unit","guides","scale_fill_viridis_c", "theme_set", "theme_light", "scale_color_manual", "element_blank") importFrom("grDevices", "col2rgb", "gray", "rgb", "grey") importFrom("graphics", "abline", "axis", "barplot", "box", "layout", "legend", "par", "plot", "points", "segments","boxplot", "stripchart", "title", "grconvertX", "plot.new") importFrom("stats", "as.dist", "as.formula", "median", "p.adjust", "density", "quantile", "aggregate", "cor", "wilcox.test", "terms.formula", "complete.cases") importFrom("methods", "new") importFrom("relations","relation","as.relation", "relation_domain", "relation_incidence", "relation_is_asymmetric","relation_consensus","relation_ensemble", "relation_is_irreflexive", "relation_is_negatively_transitive", "relation_is_transitive", "relation_is_trichotomous", "relation_scores", "relation_violations","relation_dissimilarity") importFrom("graph", "addEdge") S3method(print, comparedRanks) S3method(print, aggregated) S3method(print, ranked) S3method(print, aggregated.list) S3method(print, ranked.list) S3method(aggregate, challenge) S3method(aggregate, ranked.list) S3method(aggregate, bootstrap.list) S3method(aggregate, bootstrap) S3method(test, default) S3method(test, challenge) S3method(Aggregate, default) S3method(Aggregate, list) S3method(Rank, default) S3method(Rank, list) S3method(rank, default) S3method(rank, challenge) S3method(rank, aggregated.list) S3method(rank, aggregatedRanks.list) S3method(bootstrap, default) S3method(bootstrap, ranked.list) S3method(dendrogram, default) S3method(dendrogram, ranked.list) S3method(winner, default) S3method(winner, ranked) S3method(winner, ranked.list) S3method(winner, bootstrap) S3method(winner, bootstrap.list) S3method(rankFrequencies, default) S3method(rankFrequencies, bootstrap) S3method(rankFrequencies, bootstrap.list) S3method(winnerFrequencies, default) S3method(winnerFrequencies, bootstrap) S3method(winnerFrequencies, bootstrap.list) S3method(compareRanks,default) S3method(compareRanks,ranked) S3method(compareRanks,ranked.list) S3method(merge,list) S3method(melt,ranked.list) S3method(melt,aggregated.list) S3method(boxplot,ranked.list) S3method(boxplot,comparedRanks.list) S3method(boxplot,bootstrap.list) S3method(select.if,default) S3method(select.if,list) S3method(select.if,aggregated.list) S3method(select.if,ranked.list) S3method(select.if,comparedRanks.list) S3method(subset,list) S3method(subset,bootstrap.list) S3method(subset,aggregated.list) S3method(subset,ranked.list) S3method(subset,comparedRanks.list) S3method(podium,default) S3method(podium,challenge) S3method(podium,ranked.list) S3method(network,default) S3method(network,ranked.list) S3method(network,dist) S3method(plot,network) S3method(density,bootstrap.list) S3method(as.relation,challenge.incidence) S3method(as.relation,ranked.list) S3method(subset,bootstrap.list) S3method(subset,ranked.list) S3method(subset,list) S3method(subset,comparedRanks.list) S3method(subset,aggregated.list) S3method(decision,challenge) S3method(decision,default) S3method(lineplot,challenge) S3method(lineplot,default) S3method(methodsplot,challenge) S3method(methodsplot,default) S3method(significanceMap,data.frame) S3method(significanceMap,ranked.list) S3method(significanceMap,default) S3method(violin,bootstrap.list) S3method(violin,default) S3method(rankingHeatmap,ranked.list) S3method(rankingHeatmap,default) S3method(relation_dissimilarity,ranked.list) S3method(relation_dissimilarity,default) S3method(stabilityByTask,bootstrap.list) S3method(stabilityByTask,default) S3method(stability,default) S3method(stability,ranked.list) S3method(stabilityByAlgorithm,bootstrap.list) S3method(stabilityByAlgorithm,default) -S3method(stabilityByAlgorithmStacked,bootstrap.list) -S3method(stabilityByAlgorithmStacked,default) - S3method(consensus,ranked.list) S3method(consensus,default) S3method(report,bootstrap.list) S3method(report,ranked.list) S3method(report,default) diff --git a/R/visualization.R b/R/visualization.R index 8df79f5..d37032d 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -1,273 +1,301 @@ stability <- function(x,...) UseMethod("stability") stability.default <- function(x, ...) stop("not implemented for this class") stabilityByAlgorithm <- function(x,...) UseMethod("stabilityByAlgorithm") stabilityByAlgorithm.default <- function(x, ...) stop("not implemented for this class") -stabilityByAlgorithmStacked <- function(x,...) UseMethod("stabilityByAlgorithmStacked") -stabilityByAlgorithmStacked.default <- function(x, ...) stop("not implemented for this class") stabilityByTask <- function(x,...) UseMethod("stabilityByTask") stabilityByTask.default <- function(x, ...) stop("not implemented for this class") stability.ranked.list=function(x, ordering, probs=c(.025,.975), max_size=6, freq=FALSE, shape=4,...) { if (length(x$data) < 2) { stop("The stability of rankings across tasks cannot be computed for less than two tasks.") } dd=melt(x, measure.vars="rank", value.name="rank") %>% dplyr::rename(task="L1") if (!missing(ordering)) { dd=dd%>%mutate(algorithm=factor(.data$algorithm, levels=ordering)) } else dd=dd%>%mutate(algorithm=factor(.data$algorithm)) if (!freq) { p = ggplot(dd)+ geom_count(aes(algorithm , rank, color=algorithm , size = stat(prop*100))) } else { p=ggplot(dd)+ geom_count(aes(algorithm, rank ,color=algorithm )) } p+scale_size_area(max_size = max_size)+ stat_summary(aes(algorithm ,rank ), geom="point", shape=shape, fun.data=function(x) data.frame(y=median(x)),...)+ stat_summary(aes(algorithm ,rank ), geom="linerange", fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), ymax=quantile(x,probs[2])))+ geom_abline(slope=1, color="gray", linetype="dotted")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ guides(size = guide_legend(title="%"))+ scale_y_continuous(minor_breaks=NULL, limits=c(1,max(5,max(dd$rank))), breaks=c(1,seq(5,max(5,max(dd$rank)),by=5)))+ xlab("Algorithm")+ ylab("Rank") } rankdist.bootstrap.list=function(x,...){ rankDist=melt(lapply(x$bootsrappedRanks,t), value.name="rank") %>% dplyr::rename(algorithm="Var2",task="L1") rankDist } stabilityByAlgorithm.bootstrap.list=function(x, ordering, - probs=c(.025,.975), - max_size=3, - shape=4, + stacked = FALSE, + probs=c(.025,.975),#only for !stacked + max_size=3,#only for !stacked + shape=4,#only for !stacked + freq=FALSE, #only for stacked single=FALSE,...) { - - if (length(x$data) < 2) { - stop("The stability of rankings by algorithm cannot be computed for less than two tasks.") - } - - rankDist=rankdist.bootstrap.list(x) - -if (!missing(ordering)) rankDist=rankDist%>%mutate(algorithm=factor(.data$algorithm, - levels=ordering)) - -if (single==FALSE){ - ggplot(rankDist)+ - geom_count(aes(task , - rank, - color=algorithm, - size = stat(prop*100), - group = task ))+ - scale_size_area(max_size = max_size)+ - stat_summary(aes(task ,rank ), - geom="point", - shape=shape, - fun.data=function(x) data.frame(y=median(x)),...)+ - stat_summary(aes(task ,rank ), - geom="linerange", - fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), - ymax=quantile(x,probs[2])))+ - facet_wrap(vars(algorithm))+ - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ - guides(size = guide_legend(title="%"))+ - scale_y_continuous(minor_breaks=NULL, - limits=c(1,max(5,max(rankDist$rank))), - breaks=c(1,seq(5,max(5,max(rankDist$rank)),by=5)))+ - xlab("Task")+ - ylab("Rank") - -} else { - pl=list() - for (alg in ordering){ - rankDist.alg=subset(rankDist, - rankDist$algorithm==alg) - pl[[alg]]=ggplot(rankDist.alg)+ - geom_count(aes(task , - rank, - color=algorithm, - size = stat(prop*100), - group = task ))+ - scale_size_area(max_size = max_size)+ - stat_summary(aes(task , - rank ), - geom="point", - shape=shape, - fun.data=function(x) data.frame(y=median(x)),...)+ - stat_summary(aes(task ,rank ), - geom="linerange", - fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), - ymax=quantile(x,probs[2])))+ - facet_wrap(vars(algorithm))+ - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ - guides(size = guide_legend(title="%"))+ - scale_y_continuous(minor_breaks=NULL, - limits=c(1,max(5,max(rankDist$rank))), - breaks=c(1,seq(5,max(5,max(rankDist$rank)),by=5)))+ - xlab("Task")+ - ylab("Rank") - - - } - names(pl) = names(x$matlist) - class(pl) <- "ggList" - pl -} - -} - - - - -stabilityByAlgorithmStacked.bootstrap.list=function(x, - ordering, - freq=FALSE,...) { if (length(x$data) < 2) { stop("The stability of rankings by algorithm cannot be computed for less than two tasks.") } - + rankDist=rankdist.bootstrap.list(x) + if (!missing(ordering)) rankDist=rankDist%>%mutate(algorithm=factor(.data$algorithm, levels=ordering)) - rankDist=rankDist%>%group_by(task)%>%dplyr::count(.data$algorithm, - .data$rank) - rankDist=rankDist%>%group_by(.data$algorithm)%>%mutate(prop=.data$n/sum(.data$n))%>%ungroup - rankDist=rankDist%>%data.frame%>%mutate(rank=as.factor(.data$rank)) - + + if (!stacked){ + if (single==FALSE){ + pl <- ggplot(rankDist)+ + geom_count(aes(task , + rank, + color=algorithm, + size = stat(prop*100), + group = task ))+ + scale_size_area(max_size = max_size)+ + stat_summary(aes(task ,rank ), + geom="point", + shape=shape, + fun.data=function(x) data.frame(y=median(x)),...)+ + stat_summary(aes(task ,rank ), + geom="linerange", + fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), + ymax=quantile(x,probs[2])))+ + facet_wrap(vars(algorithm))+ + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ + guides(size = guide_legend(title="%"))+ + scale_y_continuous(minor_breaks=NULL, + limits=c(1,max(5,max(rankDist$rank))), + breaks=c(1,seq(5,max(5,max(rankDist$rank)),by=5)))+ + xlab("Task")+ + ylab("Rank") + + } else { + pl=list() + for (alg in ordering){ + rankDist.alg=subset(rankDist, + rankDist$algorithm==alg) + pl[[alg]]=ggplot(rankDist.alg)+ + geom_count(aes(task , + rank, + color=algorithm, + size = stat(prop*100), + group = task ))+ + scale_size_area(max_size = max_size)+ + stat_summary(aes(task , + rank ), + geom="point", + shape=shape, + fun.data=function(x) data.frame(y=median(x)),...)+ + stat_summary(aes(task ,rank ), + geom="linerange", + fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), + ymax=quantile(x,probs[2])))+ + facet_wrap(vars(algorithm))+ + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ + guides(size = guide_legend(title="%"))+ + scale_y_continuous(minor_breaks=NULL, + limits=c(1,max(5,max(rankDist$rank))), + breaks=c(1,seq(5,max(5,max(rankDist$rank)),by=5)))+ + xlab("Task")+ + ylab("Rank") + } + names(pl) = names(x$matlist) + class(pl) <- "ggList" + } + + } else { #stacked + rankDist=rankDist%>% + group_by(task)%>% + dplyr::count(.data$algorithm, + .data$rank)%>% + group_by(.data$algorithm)%>% + mutate(prop=.data$n/sum(.data$n)*100)%>% + ungroup%>% + data.frame%>% + mutate(rank=as.factor(.data$rank)) + + results= melt.ranked.list(x, + measure.vars="rank", + value.name="rank") %>% + dplyr::select(-.data$variable) + colnames(results)[3]="task" + if (!missing(ordering)) results=results%>%mutate(algorithm=factor(.data$algorithm, + levels=ordering)) + + if (single==FALSE){ + pl<- ggplot(rankDist) + + facet_wrap(vars(algorithm))+ + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + + if (freq){ + pl <- pl + geom_bar(aes(rank, + n, + fill=task ), + position = "stack", + stat = "identity") + + ylab("Frequency") + } else { + pl <- pl + geom_bar(aes(rank, + prop, + fill=task ), + position = "stack", + stat = "identity")+ + ylab("Proportion (%)") + } + + pl <- pl + + geom_vline(aes(xintercept=rank, + color=task), + size=.4, + linetype="dotted", + data=results) + + xlab("Rank") + } else { + pl=list() + for (alg in ordering){ + rankDist.alg=subset(rankDist, + rankDist$algorithm==alg) + results.alg=subset(results, + results$algorithm==alg) + pl[[alg]]=ggplot(rankDist.alg)+ + facet_wrap(vars(algorithm))+ + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + + if (freq){ + pl[[alg]] <- pl[[alg]] + geom_bar(aes(rank, + n, + fill=task ), + position = "stack", + stat = "identity") + + ylab("Frequency") + } else { + pl[[alg]] <- pl[[alg]] + geom_bar(aes(rank, + prop, + fill=task ), + position = "stack", + stat = "identity")+ + ylab("Proportion (%)") + } + + pl[[alg]] <- pl[[alg]] + + geom_vline(aes(xintercept=rank, + color=task), + size=.4, + linetype="dotted", + data=results.alg) + + xlab("Rank") + } + names(pl) = names(x$matlist) + class(pl) <- "ggList" + } + } + pl +} - results= melt.ranked.list(x, - measure.vars="rank", - value.name="rank") %>%dplyr::select(-.data$variable) - colnames(results)[3]="task" - if (!missing(ordering)) results=results%>%mutate(algorithm=factor(.data$algorithm, - levels=ordering)) - if (freq) - ggplot(rankDist) + - geom_bar(aes(rank, - n, - fill=task ), - position = "stack", - stat = "identity") + - facet_wrap(vars(algorithm))+ - geom_vline(aes(xintercept=rank, - color=task), - size=.6, - linetype="dotted", - data=results )+ - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ - xlab("Rank") - else - ggplot(rankDist)+ - geom_bar(aes(rank, - prop, - fill=task ), - position = "stack", - stat = "identity")+ - facet_wrap(vars(algorithm))+ - geom_vline(aes(xintercept=rank, - color=task), - size=.4, - linetype="dotted", - data=results)+ - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ - xlab("Rank") -} stabilityByTask.bootstrap.list=function(x, ordering, probs=c(.025,.975), max_size=3, size.ranks=.3*theme_get()$text$size, shape=4, showLabelForSingleTask=FALSE,...){ rankDist=rankdist.bootstrap.list(x) ranks=melt.ranked.list(x, measure.vars="rank", value.name = "full.rank") colnames(ranks)[4]="task" if (!missing(ordering)) { ranks$algorithm=factor(ranks$algorithm, levels=ordering) rankDist=rankDist%>%mutate(algorithm=factor(.data$algorithm, levels=ordering)) } blobPlot <- ggplot(rankDist)+ geom_count(aes(algorithm , rank, color=algorithm, size = stat(prop*100), group = algorithm ))+ scale_size_area(max_size = max_size)+ geom_abline(slope=1, color="gray", linetype="dotted")+ stat_summary(aes(algorithm ,rank ), geom="point", shape=shape, fun.data=function(x) data.frame(y=median(x)),...)+ stat_summary(aes(algorithm ,rank ), geom="linerange", fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), ymax=quantile(x,probs[2])))+ geom_text(aes(x=algorithm,y=1,label=full.rank), nudge_y=-.6, vjust = 0, size=size.ranks, fontface="plain", family="sans", data=ranks) + coord_cartesian(clip = 'off')+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ guides(size = guide_legend(title="%"))+ scale_y_continuous(minor_breaks=NULL, limits=c(.4,max(5,max(rankDist$rank))), breaks=c(1,seq(5,max(5,max(rankDist$rank)),by=5)))+ xlab("Algorithm")+ ylab("Rank") # Create multi-panel plot with task names as labels for multi-task data set or single-task data set when explicitly specified if (length(x$data) > 1 || showLabelForSingleTask == TRUE) { blobPlot <- blobPlot + facet_wrap(vars(task)) } return(blobPlot) } diff --git a/inst/appdir/characterizationOfAlgorithmsBootstrapping.Rmd b/inst/appdir/characterizationOfAlgorithmsBootstrapping.Rmd index db8f9dd..9ba7e57 100644 --- a/inst/appdir/characterizationOfAlgorithmsBootstrapping.Rmd +++ b/inst/appdir/characterizationOfAlgorithmsBootstrapping.Rmd @@ -1,57 +1,69 @@ ### Ranking stability: Ranking variability via bootstrap approach Blob plot of bootstrap results over the different tasks separated by algorithm allows another perspective on the assessment data. This gives deeper insights into the characteristics of tasks and the ranking uncertainty of the algorithms in each task. \bigskip ```{r blobplot_bootstrap_byAlgorithm,fig.width=7,fig.height = 5} #stabilityByAlgorithm.bootstrap.list if (length(boot_object$matlist)<=6 &nrow((boot_object$matlist[[1]]))<=10 ){ stabilityByAlgorithm(boot_object, ordering=ordering_consensus, max_size = 9, size=4, shape=4, single = F) + scale_color_manual(values=cols) } else { pl=stabilityByAlgorithm(boot_object, ordering=ordering_consensus, max_size = 9, size=4, shape=4, single = T) - for (i in 1:length(pl)) print(pl[[i]] + + for (i in 1:length(pl)) print(pl[[i]] + scale_color_manual(values=cols) + guides(size = guide_legend(title="%"),color="none") ) } ``` An alternative representation is provided by a stacked frequency plot of the observed ranks, separated by algorithm. Observed ranks across bootstrap samples are displayed with colouring according to task. For algorithms that achieve the same rank in different tasks for the full assessment data set, vertical lines are on top of each other. Vertical lines allow to compare the achieved rank of each algorithm over different tasks. \bigskip + ```{r stackedFrequencies_bootstrap_byAlgorithm,fig.width=7,fig.height = 5} -#stabilityByAlgorithmStacked.bootstrap.list -stabilityByAlgorithmStacked(boot_object,ordering=ordering_consensus) -``` +#stabilityByAlgorithm.bootstrap.list +if (length(boot_object$matlist)<=6 &nrow((boot_object$matlist[[1]]))<=10 ){ + stabilityByAlgorithm(boot_object, + ordering=ordering_consensus, + stacked = TRUE, + single = F) +} else { + pl=stabilityByAlgorithm(boot_object, + ordering=ordering_consensus, + stacked = TRUE, + single = T) + print(pl) +} +``` diff --git a/inst/appdir/characterizationOfTasksBootstrapping.Rmd b/inst/appdir/characterizationOfTasksBootstrapping.Rmd index c761603..51f6438 100644 --- a/inst/appdir/characterizationOfTasksBootstrapping.Rmd +++ b/inst/appdir/characterizationOfTasksBootstrapping.Rmd @@ -1,49 +1,49 @@ ### Visualizing bootstrap results To investigate which tasks separate algorithms well (i.e., lead to a stable ranking), a blob plot is recommended. Bootstrap results can be shown in a blob plot showing one plot for each task. In this view, the spread of the blobs for each algorithm can be compared across tasks. Deviations from the diagonal indicate deviations from the consensus ranking (over tasks). Specifically, if rank distribution of an algorithm is consistently below the diagonal, the algorithm performed better in this task than on average across tasks, while if the rank distribution of an algorithm is consistently above the diagonal, the algorithm performed worse in this task than on average across tasks. At the bottom of each panel, ranks for each algorithm in the tasks is provided. Same as in Section \ref{blobByTask} but now ordered according to consensus. \bigskip ```{r blobplot_bootstrap_byTask,fig.width=9, fig.height=9} #stabilityByTask.bootstrap.list if (length(boot_object$matlist)<=6 &nrow((boot_object$matlist[[1]]))<=10 ){ stabilityByTask(boot_object, ordering=ordering_consensus, max_size = 9, size=4, shape=4) + scale_color_manual(values=cols) } else { pl=list() for (subt in names(boot_object$bootsrappedRanks)){ a=list(bootsrappedRanks=list(boot_object$bootsrappedRanks[[subt]]), matlist=list(boot_object$matlist[[subt]])) names(a$bootsrappedRanks)=names(a$matlist)=subt class(a)="bootstrap.list" r=boot_object$matlist[[subt]] pl[[subt]]=stabilityByTask(a, max_size = 9, ordering=ordering_consensus, size.ranks=.25*theme_get()$text$size, size=4, shape=4) + scale_color_manual(values=cols) + ggtitle(subt) } - for (i in 1:length(pl)) print(pl[[i]]) + print(pl) } ``` \ No newline at end of file diff --git a/inst/appdir/visualizationBlobPlots.Rmd b/inst/appdir/visualizationBlobPlots.Rmd index b730e57..bb36915 100644 --- a/inst/appdir/visualizationBlobPlots.Rmd +++ b/inst/appdir/visualizationBlobPlots.Rmd @@ -1,39 +1,39 @@ ## *Blob plot* for visualizing ranking stability based on bootstrap sampling \label{blobByTask} Algorithms are color-coded, and the area of each blob at position $\left( A_i, \text{rank } j \right)$ is proportional to the relative frequency $A_i$ achieved rank $j$ across $b=$ `r ncol(boot_object$bootsrappedRanks[[1]])` bootstrap samples. The median rank for each algorithm is indicated by a black cross. 95\% bootstrap intervals across bootstrap samples are indicated by black lines. \bigskip ```{r blobplot_bootstrap,fig.width=9, fig.height=9} showLabelForSingleTask <- FALSE if (length(names(boot_object$bootsrappedRanks)) > 1) { showLabelForSingleTask <- TRUE } pl=list() for (subt in names(boot_object$bootsrappedRanks)){ a=list(bootsrappedRanks=list(boot_object$bootsrappedRanks[[subt]]), matlist=list(boot_object$matlist[[subt]])) names(a$bootsrappedRanks)=names(a$matlist)=subt class(a)="bootstrap.list" r=boot_object$matlist[[subt]] pl[[subt]]=stabilityByTask(a, max_size =8, ordering=rownames(r[order(r$rank),]), size.ranks=.25*theme_get()$text$size, size=8, shape=4, showLabelForSingleTask=showLabelForSingleTask) + scale_color_manual(values=cols) } # if (length(boot_object$matlist)<=6 &nrow((boot_object$matlist[[1]]))<=10 ){ # ggpubr::ggarrange(plotlist = pl) # } else { - for (i in 1:length(pl)) print(pl[[i]]) + print(pl) #} ``` diff --git a/tests/testthat/test-stackedBarPlotStabilityByAlgorithm.R b/tests/testthat/test-stackedBarPlotStabilityByAlgorithm.R index 507dfa2..c87aace 100644 --- a/tests/testthat/test-stackedBarPlotStabilityByAlgorithm.R +++ b/tests/testthat/test-stackedBarPlotStabilityByAlgorithm.R @@ -1,52 +1,52 @@ test_that("stacked bar plot for visualizing ranking stability by algorithm raises error for single-task data set", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=median, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) - expect_error(stabilityByAlgorithmStacked(rankingBootstrapped), + expect_error(stabilityByAlgorithm(rankingBootstrapped, stacked =TRUE), "The stability of rankings by algorithm cannot be computed for less than two tasks.", fixed=TRUE) }) test_that("stacked bar plot for visualizing ranking stability by algorithm returns one plot for multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A2", value=0.3, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.7, case="C2"), data.frame(algo="A2", value=0.8, case="C2"), data.frame(algo="A3", value=0.9, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=median, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) - actualPlot <- stabilityByAlgorithmStacked(rankingBootstrapped) + actualPlot <- stabilityByAlgorithm(rankingBootstrapped, stacked =TRUE) expect_is(actualPlot, "ggplot") })