diff --git a/R/Bootstrap.R b/R/Bootstrap.R index 8d172ea..aa29937 100644 --- a/R/Bootstrap.R +++ b/R/Bootstrap.R @@ -1,222 +1,222 @@ # Copyright (c) German Cancer Research Center (DKFZ) # All rights reserved. # # This file is part of challengeR. # # challengeR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # challengeR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with challengeR. If not, see . bootstrap <- function(object,...) UseMethod("bootstrap") bootstrap.default <- function(object, ...) stop("not implemented for this class") #' Performs bootstrapping #' #' Performs bootstrapping on a ranked assessment data set and applies the ranking method to each bootstrap sample. One bootstrap sample of #' a task with \code{n} cases consists of \code{n} cases randomly drawn with replacement from this task. #' A total of \code{nboot} of these bootstrap samples are drawn. #' #' To ensure reproducibility, please use the doRNG package on Windows or RNG kind = "L'Ecuyer-CMRG" in set.seed(), e.g. set.seed(1, kind = "L'Ecuyer-CMRG"). #' #' @param object The ranked assessment data set. #' @param nboot The number of bootstrap samples. #' @param parallel A boolean specifying whether parallel processing should be enabled. #' @param progress A string specifying the type of progress indication. #' @param ... Further arguments passed to or from other functions. #' #' @return An S3 object of class "bootstrap.list" to represent a bootstrapped, ranked assessment data set. #' #' @examples #' #' \dontrun{ #' # perform bootstrapping with 1000 bootstrap samples using one CPU #' set.seed(123, kind="L'Ecuyer-CMRG") #' ranking_bootstrapped <- bootstrap(ranking, nboot = 1000) #' } #' #' \dontrun{ #' # perform bootstrapping using multiple CPUs (here: 8 CPUs) #' library(doParallel) #' library(doRNG) #' registerDoParallel(cores = 8) #' registerDoRNG(123) #' ranking_bootstrapped <- bootstrap(ranking, nboot = 1000, parallel = TRUE, progress = "none") #' stopImplicitCluster() #' } #' #' @export bootstrap.ranked.list=function(object, nboot, parallel=FALSE, progress="text", ...){ algorithm=attr(object$data,"algorithm") by=attr(object$data,"case") # exclude if only 1 test case or only 1 algorithm tidy.data.id=sapply(object$data, function(data.subset) { ifelse((length(unique(data.subset[[by]]))==1 | length(unique(data.subset[[algorithm]]))<=1 ), yes=FALSE, no=TRUE) }) if (sum(tidy.data.id)==0) { if (length(object$matlist)>1) stop("All tasks only contained 1 test case. Bootstrapping with 1 test case not sensible.") else stop("Only 1 test case included. Bootstrapping with 1 test case not sensible.") } if (sum(tidy.data.id). test <- function(x,...) UseMethod("test") test.default <- function(x, ...) stop("not implemented for this class") test.challenge=function(x,...) aggregate.challenge(x=x, FUN="significance",...) #' Title #' #' @param x #' @param FUN #' @param na.treat #' @param alpha #' @param p.adjust.method #' @param parallel #' @param progress #' @param ... #' #' @return #' @export #' #' @examples aggregate.challenge=function(x, FUN=mean, na.treat, #either "na.rm", numeric value or function alpha=0.05, p.adjust.method="none",# only needed for significance parallel=FALSE, progress="none",...){ call=as.list(match.call()) if (missing(na.treat) && !is.null(attr(x,"na.treat"))) na.treat <- attr(x, "na.treat") if (missing(na.treat)){ #na.treat only optional if no missing values in data set if (!inherits(x,"list")){ if (!any(is.na(x[,attr(x, "value")]))) na.treat="na.rm" # there are no missings so set na.treat by dummy "na.rm" has no effect } else { if (!any(sapply(x, function(task) any(is.na(task[,attr(x, "value")]))))) na.treat="na.rm" # there are no missings so set na.treat by dummy "na.rm" has no effect } } else attr(x,"na.treat") <- na.treat res1=do.call("Aggregate",list(object=x, x=attr(x,"value"), algorithm=attr(x,"algorithm"), FUN=call$FUN, na.treat=na.treat, parallel=parallel, progress=progress, case=attr(x,"case"), alpha=alpha, p.adjust.method=p.adjust.method, smallBetter=attr(x,"smallBetter") # only needed for significance )) call2=call("Aggregate", object=call$x, x=attr(x,"value"), algorithm=attr(x,"algorithm"), FUN=call$FUN, na.treat=na.treat, parallel=parallel,progress=progress, case=attr(x,"case"), alpha=alpha, p.adjust.method=p.adjust.method, smallBetter=attr(x,"smallBetter") # only needed for significance ) if (inherits(x,"list")){ res=list(FUN = . %>% (call2), call=list(call2), FUN.list=list(FUN), data=x, matlist=res1$matlist, isSignificance=res1$isSignificance) class(res)=c("aggregated.list",class(res)) } else { res=list(FUN = . %>% (call2), call=list(call2), FUN.list=list(FUN), data=x, mat=res1$mat, isSignificance=res1$isSignificance) class(res)=c("aggregated",class(res)) } res } aggregate.ranked.list <-function(x, FUN=mean, ...){ call=match.call(expand.dots = F) call=call("aggregate.ranked.list", x=call$x, FUN=FUN) algorithm=attr(x$data,"algorithm") resmatlist=Aggregate.list(x$matlist, x="rank", algorithm=algorithm, FUN=FUN,...)$matlist resmatlist=lapply(resmatlist, function(z) as.data.frame(z)) res=list(matlist=resmatlist, call=c(x$call,call), data=x$data, FUN = . %>% (x$FUN) %>% (call), FUN.list=c(x$FUN.list,FUN) ) class(res)=c("aggregatedRanks.list",class(res)) res } aggregate.bootstrap.list <-function(x, what="metric", FUN=mean, ...){ call=match.call(expand.dots = T) if (is.character(FUN)) FUN=try(eval(parse(text=FUN)), silent = T) FUNname=as.character(call$FUN) if (!is.function(FUN)) stop("FUN has to be a function (possibly as character)") - matlist=llply(1:length(x$bootsrappedRank), + matlist=llply(1:length(x$bootstrappedRank), function(i.piece){ - if (what=="ranks") xmean <- as.data.frame(apply(x$bootsrappedRank[[i.piece]],1,FUN=FUN)) - else xmean <- as.data.frame(apply(x$bootsrappedAggregate[[i.piece]],1,FUN=FUN)) + if (what=="ranks") xmean <- as.data.frame(apply(x$bootstrappedRank[[i.piece]],1,FUN=FUN)) + else xmean <- as.data.frame(apply(x$bootstrappedAggregate[[i.piece]],1,FUN=FUN)) names(xmean)=paste0(what,"_",FUNname) xmean }) - names(matlist)=names(x$bootsrappedRank) + names(matlist)=names(x$bootstrappedRank) res=list(FUN = . %>% (call), call=list(call), data=x, matlist=matlist) class(res)=c("aggregated.list",class(res)) res } aggregate.bootstrap<-function(x,what="metric",FUN=mean, ... ){ call=match.call(expand.dots = T) if (is.character(FUN)) FUN=try(eval(parse(text=FUN)),silent = T) FUNname=as.character(call$FUN) if (!is.function(FUN)) stop("FUN has to be a function (possibly as character)") - if (what=="ranks") xmean <- as.data.frame(apply(x$bootsrappedRank, + if (what=="ranks") xmean <- as.data.frame(apply(x$bootstrappedRank, 1, FUN=FUN)) - else xmean <- as.data.frame(apply(x$bootsrappedAggregate, + else xmean <- as.data.frame(apply(x$bootstrappedAggregate, 1, FUN=FUN)) names(xmean)=paste0(what,"_",FUNname) res=list(FUN = . %>% (call), call=list(call), data=x, mat=xmean) class(res)=c("aggregated",class(res)) res } diff --git a/R/boxplot.R b/R/boxplot.R index 576d183..afc9578 100644 --- a/R/boxplot.R +++ b/R/boxplot.R @@ -1,101 +1,101 @@ # Copyright (c) German Cancer Research Center (DKFZ) # All rights reserved. # # This file is part of challengeR. # # challengeR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # challengeR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with challengeR. If not, see . #' Creates dot- and boxplots #' #' Creates dot- and boxplots visualizing the assessment data separately for each algorithm. #' Boxplots representing descriptive statistics for all test cases (median, quartiles and outliers) #' are combined with horizontally jittered dots representing individual test cases. #' #' @param x The ranked assessment data set. #' @param color A string specifying the color of the dots. #' @param jitter.width A numeric value specifying the jitter width of the dots. #' @param ... Further arguments passed to or from other functions. #' #' @return #' #' @examples #' #' @seealso `browseVignettes("challengeR")` #' #' @family functions to visualize assessment data #' @export boxplot.ranked.list=function(x, jitter.width=0.25,...){ algo=attr(x$data,"algorithm") value=attr(x$data,"value") ranking=x x=x$data for (i in names(x)) { x[[i]][[algo]]=factor(x[[i]][[algo]], levels=rownames(ranking$matlist[[i]][order(ranking$matlist[[i]]$rank),])) } a=lapply(1:length(x),function(id){ ggplot(data=x[[id]])+ geom_jitter(aes_string(algo,value,color=algo), position=position_jitter(width=jitter.width, height=0), ...)+ geom_boxplot(aes_string(algo,value), outlier.shape = NA,fill=NA)+ ggtitle(names(x)[id]) + theme(axis.text.x=element_text(angle = -90, hjust = 0), legend.position="none") + xlab("Algorithm") + ylab("Metric value") }) # Remove title for single-task data set if (length(a) == 1) { a[[1]]$labels$title <- NULL return(a[[1]]) } else { names(a) = names(x$matlist) class(a) <- "ggList" return(a) } } boxplot.comparedRanks.list=function(x,...){ tau=sapply(x,function(z) z$tau) boxplot(tau,ylim=c(0,1.0),las=2, outline=FALSE, ylab="Kendall's tau",...) stripchart(tau, vertical = TRUE, method = "jitter", pch = 21, col = "blue", add=TRUE,...) } boxplot.bootstrap.list=function(x,...){ winner.noboot=winner.ranked.list(x) x2=winnerFrequencies(x) - n.bootstraps= ncol(x$bootsrappedRanks[[1]]) + n.bootstraps= ncol(x$bootstrappedRanks[[1]]) perc_boot_Winner=lapply(1:length(x2),function(i){ x2.i=x2[[i]] winner.id=which(rownames(x2.i)%in%rownames(winner.noboot[[i]])) #could be multiple winners!!!! 100*x2.i[winner.id,3,drop=F]/n.bootstraps }) boxplot(unlist(perc_boot_Winner),ylim=c(0,100),las=2, outline=FALSE, ylab="% Bootstraps",xlab="Winner ranks 1", sub=paste(n.bootstraps,"Bootstraps"),...) stripchart(unlist(perc_boot_Winner), vertical = TRUE, method = "jitter", pch = 21, col = "blue", add=TRUE,...) } diff --git a/R/stability.R b/R/stability.R index eb9ae9f..9d8c32f 100644 --- a/R/stability.R +++ b/R/stability.R @@ -1,446 +1,446 @@ # Copyright (c) German Cancer Research Center (DKFZ) # All rights reserved. # # This file is part of challengeR. # # challengeR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # challengeR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with challengeR. If not, see . #' @export stability <- function(x,...) UseMethod("stability") #' @export stability.default <- function(x, ...) stop("not implemented for this class") #' @export stabilityByAlgorithm <- function(x,...) UseMethod("stabilityByAlgorithm") #' @export stabilityByAlgorithm.default <- function(x, ...) stop("not implemented for this class") #' @export stabilityByTask <- function(x,...) UseMethod("stabilityByTask") #' @export stabilityByTask.default <- function(x, ...) stop("not implemented for this class") #' Creates a blob plot across tasks #' #' Creates a blob plots visualizing the ranking variability across tasks. #' #' @param x The ranked asssessment data set. #' @param ordering #' @param probs #' @param max_size #' @param freq #' @param shape #' @param ... Further arguments passed to or from other functions. #' #' @return #' #' @examples #' #' @seealso `browseVignettes("challengeR")` #' #' @family functions to visualize cross-task insights #' @export stability.ranked.list=function(x, ordering, probs=c(.025,.975), max_size=6, freq=FALSE, shape=4,...) { if (length(x$data) < 2) { stop("The stability of rankings across tasks cannot be computed for less than two tasks.") } dd=melt(x, measure.vars="rank", value.name="rank") %>% dplyr::rename(task="L1") if (!missing(ordering)) { if (is.numeric(ordering) & !is.null(names(ordering)) ){ ordering <- names(ordering)[order(ordering)] } else if (!is.character(ordering)){ stop("Argument ordering has to be a named vector of ranks or a vector of algorithm names in the ranking order.") } dd=dd%>%mutate(algorithm=factor(.data$algorithm, levels=ordering)) } else dd=dd%>%mutate(algorithm=factor(.data$algorithm)) if (!freq) { p = ggplot(dd)+ geom_count(aes(algorithm, rank, color=algorithm, size = stat(prop*100))) } else { p=ggplot(dd)+ geom_count(aes(algorithm, rank, color=algorithm )) } # Define breaks before creating Blob plot if (max(dd$rank)>5) { breaks = c(1, seq(5, max(dd$rank), by=5)) } else { breaks = seq(1, max(dd$rank)) } p+scale_size_area(max_size = max_size)+ stat_summary(aes(algorithm, rank), geom="point", shape=shape, fun.data=function(x) data.frame(y=median(x)),...)+ stat_summary(aes(algorithm, rank), geom="linerange", fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), ymax=quantile(x,probs[2])))+ geom_abline(slope=1, color="gray", linetype="dotted")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ guides(size = guide_legend(title="%"))+ scale_y_continuous(minor_breaks=NULL, limits=c(.4, max(dd$rank)), breaks=breaks)+ xlab("Algorithm")+ ylab("Rank") } rankdist.bootstrap.list=function(x,...){ - rankDist=melt(lapply(x$bootsrappedRanks,t), + rankDist=melt(lapply(x$bootstrappedRanks,t), value.name="rank") %>% dplyr::rename(algorithm="Var2",task="L1") rankDist } #' Creates blob plots or stacked frequency plots stratified by algorithm #' #' Creates blob plots (\code{stacked = FALSE}) or stacked frequency plots (\code{stacked = TRUE}) for each algorithm #' from a bootstrapped, ranked assessment data set. #' #' @param x The bootstrapped, ranked assessment data set. #' @param ordering #' @param stacked A boolean specifying whether a stacked frequency plot (\code{stacked = TRUE}) or blob plot (\code{stacked = FALSE}) should be created. #' @param probs #' @param max_size #' @param shape #' @param freq #' @param single #' @param ... Further arguments passed to or from other functions. #' #' @return #' #' @examples #' #' @seealso `browseVignettes("challengeR")` #' #' @family functions to visualize cross-task insights #' @export stabilityByAlgorithm.bootstrap.list=function(x, ordering, stacked = FALSE, probs=c(.025,.975),#only for !stacked max_size=3,#only for !stacked shape=4,#only for !stacked freq=FALSE, #only for stacked single=FALSE,...) { if (length(x$data) < 2) { stop("The stability of rankings by algorithm cannot be computed for less than two tasks.") } rankDist=rankdist.bootstrap.list(x) if (!missing(ordering)) { if (is.numeric(ordering) & !is.null(names(ordering)) ){ ordering <- names(ordering)[order(ordering)] } else if (!is.character(ordering)){ stop("Argument ordering has to be a named vector of ranks or a vector of algorithm names in the ranking order.") } rankDist=rankDist%>%mutate(algorithm=factor(.data$algorithm, levels=ordering)) } if (!stacked){ if (single==FALSE){ # Define breaks before creating Blob plot if (max(rankDist$rank)>5) { breaks = c(1, seq(5, max(rankDist$rank), by=5)) } else { breaks = seq(1, max(rankDist$rank)) } pl <- ggplot(rankDist)+ geom_count(aes(task , rank, color=algorithm, size = stat(prop*100), group = task ))+ scale_size_area(max_size = max_size)+ stat_summary(aes(task ,rank ), geom="point", shape=shape, fun.data=function(x) data.frame(y=median(x)),...)+ stat_summary(aes(task ,rank ), geom="linerange", fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), ymax=quantile(x,probs[2])))+ facet_wrap(vars(algorithm))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ guides(size = guide_legend(title="%"))+ scale_y_continuous(minor_breaks=NULL, limits=c(.4, max(rankDist$rank)), breaks=breaks)+ xlab("Task")+ ylab("Rank") } else { pl=list() for (alg in ordering){ rankDist.alg=subset(rankDist, rankDist$algorithm==alg) # Define breaks before creating Blob plot if (max(rankDist$rank)>5) { breaks = c(1, seq(5, max(rankDist$rank), by=5)) } else { breaks = seq(1, max(rankDist$rank)) } pl[[alg]]=ggplot(rankDist.alg)+ geom_count(aes(task , rank, color=algorithm, size = stat(prop*100), group = task ))+ scale_size_area(max_size = max_size)+ stat_summary(aes(task , rank ), geom="point", shape=shape, fun.data=function(x) data.frame(y=median(x)),...)+ stat_summary(aes(task ,rank ), geom="linerange", fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), ymax=quantile(x,probs[2])))+ facet_wrap(vars(algorithm))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ guides(size = guide_legend(title="%"))+ scale_y_continuous(minor_breaks=NULL, limits=c(.4, max(rankDist$rank)), breaks=breaks)+ xlab("Task")+ ylab("Rank") } names(pl) = ordering class(pl) <- "ggList" } } else { #stacked rankDist=rankDist%>% group_by(task)%>% dplyr::count(.data$algorithm, .data$rank)%>% group_by(.data$algorithm)%>% mutate(prop=.data$n/sum(.data$n)*100)%>% ungroup%>% data.frame%>% mutate(rank=as.factor(.data$rank)) results= melt.ranked.list(x, measure.vars="rank", value.name="rank") %>% dplyr::select(-.data$variable) colnames(results)[3]="task" if (!missing(ordering)) { if (is.numeric(ordering) & !is.null(names(ordering)) ){ ordering <- names(ordering)[order(ordering)] } else if (!is.character(ordering)){ stop("Argument ordering has to be a named vector of ranks or a vector of algorithm names in the ranking order.") } results=results%>%mutate(algorithm=factor(.data$algorithm, levels=ordering)) } if (single==FALSE){ pl<- ggplot(rankDist) + facet_wrap(vars(algorithm))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) if (freq){ pl <- pl + geom_bar(aes(rank, n, fill=task ), position = "stack", stat = "identity") + ylab("Frequency") } else { pl <- pl + geom_bar(aes(rank, prop, fill=task ), position = "stack", stat = "identity")+ ylab("Proportion (%)") } pl <- pl + geom_vline(aes(xintercept=rank, color=task), size=.4, linetype="dotted", data=results) + xlab("Rank") } else { pl=list() for (alg in ordering){ rankDist.alg=subset(rankDist, rankDist$algorithm==alg) results.alg=subset(results, results$algorithm==alg) pl[[alg]]=ggplot(rankDist.alg)+ facet_wrap(vars(algorithm))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) if (freq){ pl[[alg]] <- pl[[alg]] + geom_bar(aes(rank, n, fill=task ), position = "stack", stat = "identity") + ylab("Frequency") } else { pl[[alg]] <- pl[[alg]] + geom_bar(aes(rank, prop, fill=task ), position = "stack", stat = "identity")+ ylab("Proportion (%)") } pl[[alg]] <- pl[[alg]] + geom_vline(aes(xintercept=rank, color=task), size=.4, linetype="dotted", data=results.alg) + xlab("Rank") } names(pl) = ordering class(pl) <- "ggList" } } pl } #' Creates blob plots stratified by task #' #' Creates blob plots for each task from a bootstrapped, ranked assessment data set. #' #' @param x The bootstrapped, ranked assessment data set. #' @param ordering #' @param probs #' @param max_size #' @param size.ranks #' @param shape #' @param showLabelForSingleTask A boolean specifying whether the task name should be used as title for a single-task data set. #' @param ... Further arguments passed to or from other functions. #' #' @return #' #' @examples #' #' @seealso `browseVignettes("challengeR")` #' #' @family functions to visualize ranking stability #' @family functions to visualize cross-task insights #' @export stabilityByTask.bootstrap.list=function(x, ordering, probs=c(.025,.975), max_size=3, size.ranks=.3*theme_get()$text$size, shape=4, showLabelForSingleTask=FALSE,...){ rankDist=rankdist.bootstrap.list(x) ranks=melt.ranked.list(x, measure.vars="rank", value.name = "full.rank") colnames(ranks)[4]="task" if (!missing(ordering)) { if (is.numeric(ordering) & !is.null(names(ordering)) ){ ordering <- names(ordering)[order(ordering)] } else if (!is.character(ordering)){ stop("Argument ordering has to be a named vector of ranks or a vector of algorithm names in the ranking order.") } ranks$algorithm=factor(ranks$algorithm, levels=ordering) rankDist=rankDist%>%mutate(algorithm=factor(.data$algorithm, levels=ordering)) } # Define breaks before creating Blob plot if (max(rankDist$rank)>5) { breaks = c(1, seq(5, max(rankDist$rank), by=5)) } else { breaks = seq(1, max(rankDist$rank)) } blobPlot <- ggplot(rankDist)+ geom_count(aes(algorithm , rank, color=algorithm, size = stat(prop*100), group = algorithm ))+ scale_size_area(max_size = max_size)+ geom_abline(slope=1, color="gray", linetype="dotted")+ stat_summary(aes(algorithm ,rank ), geom="point", shape=shape, fun.data=function(x) data.frame(y=median(x)),...)+ stat_summary(aes(algorithm ,rank ), geom="linerange", fun.data=function(x) data.frame(ymin=quantile(x,probs[1]), ymax=quantile(x,probs[2])))+ geom_text(aes(x=algorithm,y=1,label=full.rank), nudge_y=-.6, vjust = 0, size=size.ranks, fontface="plain", family="sans", data=ranks) + coord_cartesian(clip = 'off')+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))+ guides(size = guide_legend(title="%"))+ scale_y_continuous(minor_breaks=NULL, limits=c(.4, max(rankDist$rank)), breaks=breaks)+ xlab("Algorithm")+ ylab("Rank") # Create multi-panel plot with task names as labels for multi-task data set or single-task data set when explicitly specified if (length(x$data) > 1 || showLabelForSingleTask == TRUE) { blobPlot <- blobPlot + facet_wrap(vars(task)) } return(blobPlot) } diff --git a/R/subset.R b/R/subset.R index 94bd710..3625955 100644 --- a/R/subset.R +++ b/R/subset.R @@ -1,201 +1,201 @@ # Copyright (c) German Cancer Research Center (DKFZ) # All rights reserved. # # This file is part of challengeR. # # challengeR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # challengeR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with challengeR. If not, see . subset <- function(x,...) UseMethod("subset") subset.default <- function(x, ...) base::subset(x, ...) subset.comparedRanks.list=function(x, tasks,...){ res=x[tasks] class(res)="comparedRanks.list" res } subset.list=function(x, tasks,...){ x[tasks] } subset.aggregated.list=function(x, tasks,...){ call=match.call(expand.dots = T) if (!is.null(as.list(call$top))) stop("Subset of algorithms only sensible for single task challenges.") matlist=x$matlist[tasks] res=list(matlist=matlist, call=list(x$call,call), data=x$data, FUN = . %>% (x$FUN) %>% (call) ) class(res)=class(x) res } which.top=function(object, top){ mat=object$mat[object$mat$rank<=top,] rownames(mat)#[order(mat$rank)] } #' Extracts a subset of algorithms or tasks #' #' Extracts the top performing algorithms or a subset of tasks. #' #' @section Reports for subsets (top list) of algorithms: #' If ties are present in the ranking, the subset will consist of more than \code{top} algorithms. #' Line plots for ranking robustness can be used to check whether algorithms performing well in other #' ranking methods are excluded. Bootstrapping still takes entire uncertainty into account. #' Podium plots and ranking heatmaps neglect excluded algorithms. Only available for single-task challenges #' (for multi-task challenges not sensible because each task would contain a different set of algorithms). #' #' @section Reports for subsets of tasks: #' You may want to recompute the consensus ranking after creating the subset. An error will be raised #' if a task identifier is not contained in the assessment data set to avoid subsequent errors. #' #' #' @param x The ranked asssessment data set. #' @param top A positive integer specifying the amount of top performing algorithms to be retrieved. #' @param tasks A vector of strings containing the task identifiers that should remain in the subset. #' @param ... Further arguments passed to or from other functions. #' #' @return An S3 object of class "ranked.list" to represent a ranked assessment data set. #' #' @examples #' #' \dontrun{ #' # only show the top 3 algorithms according to the chosen ranking method #' subset(ranking, top = 3) %>% report(...) #' } #' #' \dontrun{ #' # restrict report to tasks "task1" and "task2" #' subset(ranking, tasks=c("task1", "task2")) %>% report(...) #' } #' #' @export subset.ranked.list <- function(x, top, tasks,...) { if (!missing(top) & length(x$matlist) != 1) stop("Subset of algorithms only sensible for single-task challenges. Otherwise no consensus ranking is possible.") if (!missing(top)){ taskMat <- x$matlist[[1]] taskData <- x$data[[1]] objectTop=x objectTop$matlist[[1]]=taskMat[taskMat$rank<=top,] taskMatRowNames <- rownames(objectTop$matlist[[1]]) attribute <- attr(objectTop$data,"algorithm") selectedRowNames <- taskData[[attribute]] %in% taskMatRowNames objectTop$data[[1]] <- taskData[selectedRowNames,] if (is.factor(objectTop$data[[1]][[attribute]])) objectTop$data[[1]][[attribute]] <- droplevels(objectTop$data[[1]][[attribute]]) objectTop$fulldata=x$data return(objectTop) } else if (!missing(tasks)){ if (is.character(tasks) && any(!tasks%in%names(x$matlist))) { stop("There is/are no task(s) called ",paste(tasks[!tasks%in%names(x$matlist)],collapse = " and "),".") } res=list(matlist=x$matlist[tasks], data=x$data[tasks], call=x$call, FUN=x$FUN, FUN.list=x$FUN.list ) attrib=attributes(x$data) attrib$names=attr(res$data,"names") attributes(res$data)=attrib class(res)=c("ranked.list","list") return(res) } } #' Extracts a subset of algorithms or tasks #' #' Extracts the top performing algorithms or a subset of tasks. #' #' @section Reports for subsets (top list) of algorithms: #' If ties are present in the ranking, the subset will consist of more than \code{top} algorithms. #' Line plots for ranking robustness can be used to check whether algorithms performing well in other #' ranking methods are excluded. Bootstrapping still takes entire uncertainty into account. #' Podium plots and ranking heatmaps neglect excluded algorithms. Only available for single-task challenges #' (for multi-task challenges not sensible because each task would contain a different set of algorithms). #' #' @section Reports for subsets of tasks: #' You may want to recompute the consensus ranking after creating the subset. An error will be raised #' if a task identifier is not contained in the assessment data set to avoid subsequent errors. #' #' #' @param x The bootstrapped, ranked asssessment data set. #' @param top A positive integer specifying the amount of top performing algorithms to be retrieved. #' @param tasks A vector of strings containing the task identifiers that should remain in the subset. #' @param ... Further arguments passed to or from other functions. #' #' @return An S3 object of class "bootstrap.list" to represent a bootstrapped, ranked assessment data set. #' #' @examples #' #' \dontrun{ #' # only show the top 3 algorithms according to the chosen ranking method #' subset(ranking_bootstrapped, top = 3) %>% report(...) #' } #' #' \dontrun{ #' # restrict report to tasks "task1" and "task2" and recompute consensus ranking #' meanRanks <- subset(ranking, tasks = c("task1", "task2")) %>% consensus(method = "euclidean") #' } #' #' @export subset.bootstrap.list=function(x, top, tasks, ...) { if (!missing(top) & length(x$matlist) != 1) stop("Subset of algorithms only sensible for single-task challenges. Otherwise no consensus ranking is possible.") if (!missing(top)){ objectTop <- subset.ranked.list(x, top = top) - objectTop$bootsrappedRanks[[1]] <- objectTop$bootsrappedRanks[[1]][rownames(objectTop$matlist[[1]]),] - objectTop$bootsrappedAggregate[[1]] <- objectTop$bootsrappedAggregate[[1]][rownames(objectTop$matlist[[1]]),] + objectTop$bootstrappedRanks[[1]] <- objectTop$bootstrappedRanks[[1]][rownames(objectTop$matlist[[1]]),] + objectTop$bootstrappedAggregate[[1]] <- objectTop$bootstrappedAggregate[[1]][rownames(objectTop$matlist[[1]]),] return(objectTop) } else if (!missing(tasks)){ if (is.character(tasks) && any(!tasks%in%names(x$matlist))) { stop("There is/are no task(s) called ",paste(tasks[!tasks%in%names(x$matlist)],collapse = " and "),".") } - res=list(bootsrappedRanks=x$bootsrappedRanks[tasks], - bootsrappedAggregate=x$bootsrappedAggregate[tasks], + res=list(bootstrappedRanks=x$bootstrappedRanks[tasks], + bootstrappedAggregate=x$bootstrappedAggregate[tasks], matlist=x$matlist[tasks], data=x$data[tasks], FUN=x$FUN ) attrib=attributes(x$data) attrib$names=attr(res$data,"names") attributes(res$data)=attrib class(res)="bootstrap.list" return(res) } } diff --git a/R/violin.R b/R/violin.R index b759d63..81b2452 100644 --- a/R/violin.R +++ b/R/violin.R @@ -1,124 +1,124 @@ # Copyright (c) German Cancer Research Center (DKFZ) # All rights reserved. # # This file is part of challengeR. # # challengeR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # challengeR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with challengeR. If not, see . #' @export violin <- function(x,...) UseMethod("violin") #' @export violin.default <- function(x, ...) stop("not implemented for this class") #' Creates a violin plot #' #' Creates a violin plot from a bootstrapped, ranked assessment data set. #' #' @param x The bootstrapped, ranked assessment data set. #' @param ... Further arguments passed to or from other functions. #' #' @return #' #' @examples #' #' @seealso `browseVignettes("challengeR")` #' #' @family functions to visualize ranking stability #' @export violin.bootstrap.list=function(x,...){ ken=melt(kendall.bootstrap.list(x)) colnames(ken)[2]="Task" cat("\n\nSummary Kendall's tau:\n") ss=ken%>%group_by(Task)%>% summarise(mean=mean(value,na.rm=T), median=median(value,na.rm=T), q25=quantile(value,probs = .25,na.rm=T), q75=quantile(value,probs = .75,na.rm=T))%>% arrange(desc(median)) print(knitr::kable(as.data.frame(ss))) # drop task if no kendall could be computed noResults <- sapply(split(ss,ss$Task), function(x) all(is.na(x[,-1]))) if (any(noResults)) { cat("\nNo Kendall's tau could be calculated for any bootstrap sample in task ", names(noResults)[noResults], " because of missing variability. Task dropped from figure.",fill=F) ken <- ken %>% filter(Task %in% names(noResults)[!noResults]) } xAxisText <- element_blank() # Show task names as tick mark labels only for multi-task data set if (length(x$data) > 1) { xAxisText <- element_text(angle = 90, vjust = 0.5, hjust = 1) } ken%>%mutate(Task=factor(.data$Task, levels=ss$Task))%>% ggplot(aes(Task,value))+ geom_violin(alpha=.3, color=NA, na.rm=TRUE, fill="blue")+ geom_boxplot(width=0.1, na.rm=TRUE, fill="white")+ theme(axis.text.x = xAxisText, legend.position = "none")+ ylab("Kendall's tau")+ scale_y_continuous(limits=c(min(min(ken$value),0), max(max(ken$value),1))) } kendall.bootstrap.list=function(x){ - ken=lapply(1:length(x$bootsrappedRanks),function(Task){ - id=match(rownames( x$bootsrappedRanks[[Task]]), + ken=lapply(1:length(x$bootstrappedRanks),function(Task){ + id=match(rownames( x$bootstrappedRanks[[Task]]), rownames(x$matlist[[Task]]) ) - sapply(x$bootsrappedRanks[[Task]], + sapply(x$bootstrappedRanks[[Task]], function(bootSample) suppressWarnings(kendall(bootSample, x$matlist[[Task]]$rank[id]))) } ) - names(ken)=names((x$bootsrappedRanks)) + names(ken)=names((x$bootstrappedRanks)) if (sum(is.na(x))>0){ cat("Bootstrap samples without variability in rankings (all algorithms ranked 1) excluded.\n Frequency of such samples by task:\n",fill = T) sapply(ken,function(x) sum(is.na(x))) } return(ken) } density.bootstrap.list=function(x,...){ ken=melt(kendall.bootstrap.list(x)) colnames(ken)[2]="Task" cat("\n\nSummary Kendall's tau\n") ss=ken%>%group_by(Task)%>% summarise(mean=mean(value,na.rm=T), median=median(value,na.rm=T), q25=quantile(value,probs = .25,na.rm=T), q75=quantile(value,probs = .75,na.rm=T))%>% arrange(desc(median)) print(as.data.frame(ss)) ggplot(ken)+ geom_density(aes(value,fill=Task),alpha=.3,color=NA) } diff --git a/inst/appdir/characterizationOfTasksBootstrapping.Rmd b/inst/appdir/characterizationOfTasksBootstrapping.Rmd index 2ae651d..1c7d936 100644 --- a/inst/appdir/characterizationOfTasksBootstrapping.Rmd +++ b/inst/appdir/characterizationOfTasksBootstrapping.Rmd @@ -1,55 +1,55 @@ ### Visualizing bootstrap results To investigate which tasks separate algorithms well (i.e., lead to a stable ranking), a blob plot is recommended. Bootstrap results can be shown in a blob plot showing one plot for each task. In this view, the spread of the blobs for each algorithm can be compared across tasks. Deviations from the diagonal indicate deviations from the consensus ranking (over tasks). Specifically, if rank distribution of an algorithm is consistently below the diagonal, the algorithm performed better in this task than on average across tasks, while if the rank distribution of an algorithm is consistently above the diagonal, the algorithm performed worse in this task than on average across tasks. At the bottom of each panel, ranks for each algorithm in the tasks are provided. Same as in Section \ref{blobByTask} but now ordered according to consensus. \bigskip ```{r blobplot_bootstrap_byTask,fig.width=9, fig.height=9, results='hide'} #stabilityByTask.bootstrap.list if (n.tasks<=6 & n.algorithms<=10 ){ stabilityByTask(boot_object, ordering=ordering_consensus, max_size = 9, size=4, shape=4) + scale_color_manual(values=cols) + guides(color = 'none') } else { pl=list() - for (subt in names(boot_object$bootsrappedRanks)){ - a=list(bootsrappedRanks=list(boot_object$bootsrappedRanks[[subt]]), + for (subt in names(boot_object$bootstrappedRanks)){ + a=list(bootstrappedRanks=list(boot_object$bootstrappedRanks[[subt]]), matlist=list(boot_object$matlist[[subt]])) - names(a$bootsrappedRanks)=names(a$matlist)=subt + names(a$bootstrappedRanks)=names(a$matlist)=subt class(a)="bootstrap.list" r=boot_object$matlist[[subt]] pl[[subt]]=stabilityByTask(a, max_size = 9, ordering=ordering_consensus, size.ranks=.25*theme_get()$text$size, size=4, shape=4) + scale_color_manual(values=cols) + guides(color = 'none') + ggtitle(subt)+ theme(legend.position = "bottom") } print(pl) } ``` \ No newline at end of file diff --git a/inst/appdir/visualizationBlobPlots.Rmd b/inst/appdir/visualizationBlobPlots.Rmd index 43ed934..7a916f4 100644 --- a/inst/appdir/visualizationBlobPlots.Rmd +++ b/inst/appdir/visualizationBlobPlots.Rmd @@ -1,41 +1,41 @@ ## *Blob plot* for visualizing ranking stability based on bootstrap sampling \label{blobByTask} -Algorithms are color-coded, and the area of each blob at position $\left( A_i, \text{rank } j \right)$ is proportional to the relative frequency $A_i$ achieved rank $j$ across $b=$ `r ncol(boot_object$bootsrappedRanks[[1]])` bootstrap samples. The median rank for each algorithm is indicated by a black cross. 95\% bootstrap intervals across bootstrap samples are indicated by black lines. +Algorithms are color-coded, and the area of each blob at position $\left( A_i, \text{rank } j \right)$ is proportional to the relative frequency $A_i$ achieved rank $j$ across $b=$ `r ncol(boot_object$bootstrappedRanks[[1]])` bootstrap samples. The median rank for each algorithm is indicated by a black cross. 95\% bootstrap intervals across bootstrap samples are indicated by black lines. \bigskip ```{r blobplot_bootstrap,fig.width=9, fig.height=9, results='hide'} showLabelForSingleTask <- FALSE if (n.tasks > 1) { showLabelForSingleTask <- TRUE } pl=list() -for (subt in names(boot_object$bootsrappedRanks)){ - a=list(bootsrappedRanks=list(boot_object$bootsrappedRanks[[subt]]), +for (subt in names(boot_object$bootstrappedRanks)){ + a=list(bootstrappedRanks=list(boot_object$bootstrappedRanks[[subt]]), matlist=list(boot_object$matlist[[subt]])) - names(a$bootsrappedRanks)=names(a$matlist)=subt + names(a$bootstrappedRanks)=names(a$matlist)=subt class(a)="bootstrap.list" r=boot_object$matlist[[subt]] pl[[subt]]=stabilityByTask(a, max_size =8, ordering=rownames(r[order(r$rank),]), size.ranks=.25*theme_get()$text$size, size=8, shape=4, showLabelForSingleTask=showLabelForSingleTask) + scale_color_manual(values=cols) + guides(color = 'none') } # if (length(boot_object$matlist)<=6 &nrow((boot_object$matlist[[1]]))<=10 ){ # ggpubr::ggarrange(plotlist = pl) # } else { print(pl) #} ``` diff --git a/inst/appdir/visualizationViolinPlots.Rmd b/inst/appdir/visualizationViolinPlots.Rmd index 0c77a29..cb3bee0 100644 --- a/inst/appdir/visualizationViolinPlots.Rmd +++ b/inst/appdir/visualizationViolinPlots.Rmd @@ -1,13 +1,13 @@ \newpage ## *Violin plot* for visualizing ranking stability based on bootstrapping \label{violin} -The ranking list based on the full assessment data is pairwise compared with the ranking lists based on the individual bootstrap samples (here $b=$ `r ncol(boot_object$bootsrappedRanks[[1]])` samples). For each pair of rankings, Kendall's $\tau$ correlation is computed. Kendall’s $\tau$ is a scaled index determining the correlation between the lists. It is computed by evaluating the number of pairwise concordances and discordances between ranking lists and produces values between $-1$ (for inverted order) and $1$ (for identical order). A violin plot, which simultaneously depicts a boxplot and a density plot, is generated from the results. +The ranking list based on the full assessment data is pairwise compared with the ranking lists based on the individual bootstrap samples (here $b=$ `r ncol(boot_object$bootstrappedRanks[[1]])` samples). For each pair of rankings, Kendall's $\tau$ correlation is computed. Kendall’s $\tau$ is a scaled index determining the correlation between the lists. It is computed by evaluating the number of pairwise concordances and discordances between ranking lists and produces values between $-1$ (for inverted order) and $1$ (for identical order). A violin plot, which simultaneously depicts a boxplot and a density plot, is generated from the results. \bigskip ```{r violin, results='asis'} violin(boot_object) ``` \newpage diff --git a/tests/testthat/test-subset.R b/tests/testthat/test-subset.R index eb45345..c62be78 100644 --- a/tests/testthat/test-subset.R +++ b/tests/testthat/test-subset.R @@ -1,244 +1,244 @@ # Copyright (c) German Cancer Research Center (DKFZ) # All rights reserved. # # This file is part of challengeR. # # challengeR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # challengeR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with challengeR. If not, see . test_that("top 2 performing algorithms are extracted and data set is reduced respectively", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") rankingSubset <- subset(ranking, top=2) expectedRankingSubset <- rbind( "A1" = data.frame(value_mean = 0.5, rank = 1), "A2" = data.frame(value_mean = 0.35, rank = 2)) expect_equal(rankingSubset$matlist$T1, expectedRankingSubset) expect_equal(as.vector(rankingSubset$data$T1$algo), c("A1", "A2", "A1", "A2")) expect_equal(as.vector(rankingSubset$data$T1$value), c(0.8, 0.6, 0.2, 0.1)) expect_equal(as.vector(rankingSubset$data$T1$case), c("C1", "C1", "C2", "C2")) expect_equal(as.vector(rankingSubset$data$T1$task), c("T1", "T1", "T1", "T1")) # check that full data set is preserved expect_equal(rankingSubset$fulldata$T1, challenge$T1) }) test_that("extraction of subset raises error for multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A2", value=0.3, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.7, case="C2"), data.frame(algo="A2", value=0.8, case="C2"), data.frame(algo="A3", value=0.9, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") expect_error(subset(ranking, top=2), "Subset of algorithms only sensible for single-task challenges.", fixed=TRUE) }) test_that("extraction of subset returns all algorithms even when more are requested", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") rankingSubset <- subset(ranking, top=4) expectedRankingSubset <- rbind( "A1" = data.frame(value_mean = 0.5, rank = 1), "A2" = data.frame(value_mean = 0.35, rank = 2), "A3" = data.frame(value_mean = 0.2, rank = 3)) expect_equal(rankingSubset$matlist$T1, expectedRankingSubset) }) test_that("extraction of subset returns more algorithms then requested when ties are present", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.8, case="C1"), data.frame(algo="A3", value=0.8, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.2, case="C2"), data.frame(algo="A3", value=0.2, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") rankingSubset <- subset(ranking, top=2) expectedRankingSubset <- rbind( "A1" = data.frame(value_mean = 0.5, rank = 1), "A2" = data.frame(value_mean = 0.5, rank = 1), "A3" = data.frame(value_mean = 0.5, rank = 1)) expect_equal(rankingSubset$matlist$T1, expectedRankingSubset) }) test_that("top 2 performing algorithms are extracted from bootstrap ranking and data set is reduced respectively", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) rankingBootstrappedSubset <- subset(rankingBootstrapped, top=2) expectedRankingSubset <- rbind( "A1" = data.frame(value_mean = 0.5, rank = 1), "A2" = data.frame(value_mean = 0.35, rank = 2)) expect_equal(rankingBootstrappedSubset$matlist$T1, expectedRankingSubset) expect_equal(as.vector(rankingBootstrappedSubset$data$T1$algo), c("A1", "A2", "A1", "A2")) expect_equal(as.vector(rankingBootstrappedSubset$data$T1$value), c(0.8, 0.6, 0.2, 0.1)) expect_equal(as.vector(rankingBootstrappedSubset$data$T1$case), c("C1", "C1", "C2", "C2")) expect_equal(as.vector(rankingBootstrappedSubset$data$T1$task), c("T1", "T1", "T1", "T1")) - expect_equal(dim(rankingBootstrappedSubset$bootsrappedRanks$T1), c(2, 10)) - expect_equal(dim(rankingBootstrappedSubset$bootsrappedAggregate$T1), c(2, 10)) + expect_equal(dim(rankingBootstrappedSubset$bootstrappedRanks$T1), c(2, 10)) + expect_equal(dim(rankingBootstrappedSubset$bootstrappedAggregate$T1), c(2, 10)) # check that full data set is preserved expect_equal(rankingBootstrappedSubset$fulldata$T1, challenge$T1) }) test_that("extraction of bootstrap ranking subset raises error for multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A2", value=0.3, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.7, case="C2"), data.frame(algo="A2", value=0.8, case="C2"), data.frame(algo="A3", value=0.9, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) expect_error(subset(subset(rankingBootstrapped, top=2), top=2), "Subset of algorithms only sensible for single-task challenges.", fixed=TRUE) }) test_that("extraction of bootstrap ranking subset returns all algorithms even when more are requested", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) rankingBootstrappedSubset <- subset(rankingBootstrapped, top=4) expectedRankingSubset <- rbind( "A1" = data.frame(value_mean = 0.5, rank = 1), "A2" = data.frame(value_mean = 0.35, rank = 2), "A3" = data.frame(value_mean = 0.2, rank = 3)) expect_equal(rankingBootstrappedSubset$matlist$T1, expectedRankingSubset) }) test_that("extraction of bootstrap ranking subset returns more algorithms then requested when ties are present", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.8, case="C1"), data.frame(algo="A3", value=0.8, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.2, case="C2"), data.frame(algo="A3", value=0.2, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) rankingBootstrappedSubset <- subset(rankingBootstrapped, top=2) expectedRankingSubset <- rbind( "A1" = data.frame(value_mean = 0.5, rank = 1), "A2" = data.frame(value_mean = 0.5, rank = 1), "A3" = data.frame(value_mean = 0.5, rank = 1)) expect_equal(rankingBootstrappedSubset$matlist$T1, expectedRankingSubset) }) diff --git a/tests/testthat/test-taskSubset.R b/tests/testthat/test-taskSubset.R index 7a7b566..ed94ec0 100644 --- a/tests/testthat/test-taskSubset.R +++ b/tests/testthat/test-taskSubset.R @@ -1,188 +1,188 @@ # Copyright (c) German Cancer Research Center (DKFZ) # All rights reserved. # # This file is part of challengeR. # # challengeR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # challengeR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with challengeR. If not, see . test_that("extraction of task subset works for multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A2", value=0.3, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.7, case="C2"), data.frame(algo="A2", value=0.8, case="C2"), data.frame(algo="A3", value=0.9, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") rankingSubset <- subset(ranking, tasks=c("T2")) expect_equal(length(rankingSubset$matlist), 1) expect_is(rankingSubset$matlist$T2, "data.frame") expect_equal(length(rankingSubset$data), 1) expect_is(rankingSubset$data$T2, "data.frame") }) test_that("extraction of task subset works for single-task data set", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") rankingSubset <- subset(ranking, tasks=c("T1")) expect_equal(length(rankingSubset$matlist), 1) expect_is(rankingSubset$matlist$T1, "data.frame") expect_equal(length(rankingSubset$data), 1) expect_is(rankingSubset$data$T1, "data.frame") }) test_that("extraction of task subset raises an error for invalid task name", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") expect_error(subset(ranking, tasks=c("T1x")), "There is/are no task(s) called T1x.", fixed=TRUE) }) test_that("extraction of task subset from bootstrap ranking works for multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A2", value=0.3, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.7, case="C2"), data.frame(algo="A2", value=0.8, case="C2"), data.frame(algo="A3", value=0.9, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) rankingBootstrappedSubset <- subset(rankingBootstrapped, tasks=c("T2")) expect_equal(length(rankingBootstrappedSubset$matlist), 1) expect_is(rankingBootstrappedSubset$matlist$T2, "data.frame") expect_equal(length(rankingBootstrappedSubset$data), 1) expect_is(rankingBootstrappedSubset$data$T2, "data.frame") - expect_equal(length(rankingBootstrappedSubset$bootsrappedRanks), 1) - expect_is(rankingBootstrappedSubset$bootsrappedRanks$T2, "data.frame") + expect_equal(length(rankingBootstrappedSubset$bootstrappedRanks), 1) + expect_is(rankingBootstrappedSubset$bootstrappedRanks$T2, "data.frame") - expect_equal(length(rankingBootstrappedSubset$bootsrappedAggregate), 1) - expect_is(rankingBootstrappedSubset$bootsrappedAggregate$T2, "data.frame") + expect_equal(length(rankingBootstrappedSubset$bootstrappedAggregate), 1) + expect_is(rankingBootstrappedSubset$bootstrappedAggregate$T2, "data.frame") }) test_that("extraction of task subset from bootstrap ranking works for single-task data set", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) rankingBootstrappedSubset <- subset(rankingBootstrapped, tasks=c("T1")) expect_equal(length(rankingBootstrappedSubset$matlist), 1) expect_is(rankingBootstrappedSubset$matlist$T1, "data.frame") expect_equal(length(rankingBootstrappedSubset$data), 1) expect_is(rankingBootstrappedSubset$data$T1, "data.frame") - expect_equal(length(rankingBootstrappedSubset$bootsrappedRanks), 1) - expect_is(rankingBootstrappedSubset$bootsrappedRanks$T1, "data.frame") + expect_equal(length(rankingBootstrappedSubset$bootstrappedRanks), 1) + expect_is(rankingBootstrappedSubset$bootstrappedRanks$T1, "data.frame") - expect_equal(length(rankingBootstrappedSubset$bootsrappedAggregate), 1) - expect_is(rankingBootstrappedSubset$bootsrappedAggregate$T1, "data.frame") + expect_equal(length(rankingBootstrappedSubset$bootstrappedAggregate), 1) + expect_is(rankingBootstrappedSubset$bootstrappedAggregate$T1, "data.frame") }) test_that("extraction of task subset from bootstrap ranking raises an error for invalid task name", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) expect_error(subset(rankingBootstrapped, tasks=c("T1x")), "There is/are no task(s) called T1x.", fixed=TRUE) })