diff --git a/R/boxplot.R b/R/boxplot.R
index d119132..789812c 100644
--- a/R/boxplot.R
+++ b/R/boxplot.R
@@ -1,80 +1,81 @@
 
 boxplot.ranked.list=function(x,
                              color="blue",
                              jitter.width=0.25,...){
   algo=attr(x$data,"algorithm")
   value=attr(x$data,"value")
   ranking=x
   x=x$data
 
   for (i in names(x)) {
     x[[i]][[algo]]=factor(x[[i]][[algo]],
                           levels=rownames(ranking$matlist[[i]][order(ranking$matlist[[i]]$rank),]))
   }
 
   a=lapply(1:length(x),function(id){
     ggplot(aes_string(algo,value),data=x[[id]])+
       geom_jitter(position=position_jitter(width=jitter.width, height=0),
                   color=color,...)+
       geom_boxplot(outlier.shape = NA,fill=NA)+
       ggtitle(names(x)[id]) +
-      theme(axis.text.x=element_text(angle = -90, hjust = 0))
-
+      theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
+      xlab("Algorithm") +
+      ylab("Metric value")
   })
-  names(a) = names(x)
 
   # Remove title for single-task data set
   if (length(a) == 1) {
     a[[1]]$labels$title <- NULL
+  } else {
+    names(a) = names(x$matlist)
+    class(a) <- "ggList"
   }
-  if (length(a) >1) class(a) <- "ggList"
-  else a <- a[[1]]
-  
+
   a
 }
 
 
 
 
 
 
 boxplot.comparedRanks.list=function(x,...){
   tau=sapply(x,function(z) z$tau)
   boxplot(tau,ylim=c(0,1.0),las=2, outline=FALSE,
           ylab="Kendall's tau",...)
   stripchart(tau,
              vertical = TRUE, method = "jitter",
              pch = 21, col = "blue", add=TRUE,...)
 
 }
 
 
 boxplot.bootstrap.list=function(x,...){
   winner.noboot=winner.ranked.list(x)
   x2=winnerFrequencies(x)
   n.bootstraps= ncol(x$bootsrappedRanks[[1]])
   perc_boot_Winner=lapply(1:length(x2),function(i){
     x2.i=x2[[i]]
     winner.id=which(rownames(x2.i)%in%rownames(winner.noboot[[i]])) #could be multiple winners!!!!
     100*x2.i[winner.id,3,drop=F]/n.bootstraps
   })
 
   boxplot(unlist(perc_boot_Winner),ylim=c(0,100),las=2, outline=FALSE,
           ylab="% Bootstraps",xlab="Winner ranks 1",
           sub=paste(n.bootstraps,"Bootstraps"),...)
   stripchart(unlist(perc_boot_Winner),
              vertical = TRUE, method = "jitter",
              pch = 21, col = "blue", add=TRUE,...)
 }
 
 
 
 
 # winnerFrequencies(bb)
  # winner.datax=winner(datax, largeBetter=TRUE) #no bootstrap
  #  x2=numberRank1(datax,originalranking.datax,datax_boot)
  #  boot_W_1=x2[which(x2$algorithm_id==winner.datax),3]
  #  boot_NW_1=sum(x2[-which(x2$algorithm_id==winner.datax),3]>9)
 # perc_boot_Winner=boot_W_1/Data$N_Bootstraps
 # perc_boot_NotWinner=Data$Bootstrap_Rank1_NotWinner/(Data$N_Algo-1)
 
diff --git a/R/challengeR.R b/R/challengeR.R
index 4af1170..362ef5a 100644
--- a/R/challengeR.R
+++ b/R/challengeR.R
@@ -1,149 +1,149 @@
 #' Title
 #'
 #' @param object
 #' @param value
 #' @param algorithm
 #' @param case
 #' @param taskName Optional for single-task data set that does not contain a task column.
 #' @param by The name of the column that contains the task identifiers. Required for multi-task data set.
 #' @param annotator
 #' @param smallBetter
 #' @param na.treat
 #' @param check
 #'
 #' @return
 #' @export
 #'
 #' @examples
 as.challenge=function(object,
                       value,
                       algorithm ,
                       case=NULL,
                       taskName=NULL,
                       by=NULL,
                       annotator=NULL,
                       smallBetter=FALSE,
                       na.treat=NULL, # optional
                       check=TRUE) {
 
   object=as.data.frame(object[,c(value, algorithm, case, by, annotator)])
 
   # sanity checks
   if (check) {
 
     if (!is.null(by) && !is.null(taskName)) {
       warning("Argument 'taskName' is ignored for multi-task data set.")
     }
 
     # Add task column for data set without task column by using the specified task name.
     if (is.null(by) && !is.null(taskName)) {
       taskName <- trimws(taskName)
 
       if (taskName == "") {
         stop("Argument 'taskName' is empty.")
       }
 
       object <- cbind(task=taskName, object)
       by = "task"
     }
 
     # Add task column for data set without task column by using a dummy task name.
     if (is.null(by) && is.null(taskName)) {
       object <- cbind(task="dummyTask", object)
       by = "task"
     }
 
     object=splitby(object,by=by)
     object=lapply(object,droplevels)
     missingData = n.missing = list()
     for (task in names(object)) {
       if (!all(is.numeric(object[[task]][[value]]))) stop("Performance values must be numeric.")
-      
+
       n.missing[[task]] <- sum(is.na(object[[task]][[value]])) # already missing before na.treat; for report
       if (n.missing[[task]]>0) message("Note: ", n.missing, " missing(s) present in the data set.")
       # check for missing cases
         missingData[[task]]=object[[task]] %>%
           expand(!!as.symbol(algorithm),
                  !!as.symbol(case))%>%
           anti_join(object[[task]],
                     by=c( algorithm,case))
         if (nrow(missingData[[task]])>0) {
-          if (length(object) == 1 ) { # single task 
-            message("Note: Performance of not all algorithms is observed for all cases. Inserted as missings in following cases:")
+          if (length(object) == 1 ) { # single task
+            message("Performance of not all algorithms is observed for all cases. Inserted as missings in following cases:")
           } else { # multi task
-            message("Note: Performance of not all algorithms is observed for all cases in task '",
+            message("Performance of not all algorithms is observed for all cases in task '",
                     task,
                     "'. Inserted as missings in following cases:")
-            
+
           }
           print(as.data.frame(missingData[[task]]))
           object[[task]]=as.data.frame(object[[task]] %>%
                                          complete(!!as.symbol(algorithm),
                                                   !!as.symbol(case)))
         }
       # check duplicate cases
          all1=apply(table(object[[task]][[algorithm]],
                            object[[task]][[case]]),
                      2,
                      function(x) all(x==1))
           if (!all(all1)) {
             n.duplicated <- sum(all1!=1)
-            
-            if (length(object) == 1 ) { # single task 
+
+            if (length(object) == 1 ) { # single task
               if (n.duplicated/length(all1) >= 1/5) { # at least a quarter of the cases is duplicated
-                stop ("The following cases appear more than once. Please revise. ",  
+                stop ("The following case(s) appear(s) more than once for the same algorithm. Please revise. ",
                       "Or are you considering a multi-task challenge and forgot to specify argument 'by'?\n",
-                      "Cases: ",
+                      "Case(s): ",
                       paste(names(which(all1!=1)), collapse=", ")
                       )
               } else {
-                stop ("The following case(s) appear(s) more than once. Please revise.\n",  
+                stop ("The following case(s) appear(s) more than once for the same algorithm. Please revise.\n",
                       "Case(s): ",
                       paste(names(which(all1!=1)), collapse=", ")
                       )
               }
             } else { # multi task
               stop ("The following case(s) appear(s) more than once for the same algorithm in task '",
                     task, "'. Please revise.\n",
                      "Case(s): ",
                     paste(names(which(all1!=1)), collapse=", ")
                     )
 
             }
           }
 
       if (!is.null(na.treat)) {
         if (is.numeric(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat
         else if (is.function(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat(object[[task]][,value][is.na(object[[task]][,value])])
         else if (is.character(na.treat) && na.treat=="na.rm") object[[task]]=object[[task]][!is.na(object[[task]][,value]),]
       }
     }
   }
   if (check==TRUE && (any(sapply(missingData, function(x) nrow(x))>0) |any(n.missing>0)))  {
     if (is.null(na.treat)) message("For aggregate-then-rank, na.treat will have to be specified. ",
                                    "For rank-then-aggregate, missings will implicitly lead to the algorithm ranked last for the missing test case."
                                )
     else if (is.numeric(na.treat)) message("All missings have been replaced by the value ", na.treat,".\n")
     else if (is.character(na.treat) && na.treat=="na.rm") message("All missings have been removed.")
     else if (is.function(na.treat)) {
       message("Missings have been replaced using function ")
       print(na.treat)
     }
   }
-  
+
   if (check==TRUE){
     attr(object,"n.missing")=n.missing
     attr(object,"missingData")=missingData
   }
   attr(object,"na.treat")=na.treat
-  
+
   attr(object,"algorithm")=algorithm
   attr(object,"value")=value
   attr(object,"case")=case
   attr(object,"annotator")=annotator
   attr(object,"by")=by
   attr(object,"largeBetter")=!smallBetter
   attr(object,"check")=check
   class(object)=c("challenge", class(object))
   object
 }
diff --git a/inst/appdir/reportMultiple.Rmd b/inst/appdir/reportMultiple.Rmd
index 88cfb45..8dab4d1 100644
--- a/inst/appdir/reportMultiple.Rmd
+++ b/inst/appdir/reportMultiple.Rmd
@@ -1,381 +1,381 @@
 ---
 params:
   object: NA
   colors: NA
   name: NULL
   consensus: NA
   isMultiTask: NA
   bootstrappingEnabled: NA
   fig.format: NULL
 title: "Benchmarking report for `r params$name` "
 author: "created by challengeR v`r packageVersion('challengeR')`  \nWiesenfarth, Reinke, Landman, Cardoso, Maier-Hein & Kopp-Schneider (2019)"
 date: "`r Sys.setlocale('LC_TIME', 'English'); format(Sys.time(), '%d %B, %Y')`"
 editor_options: 
   chunk_output_type: console
 ---
 
 <!-- This text is outcommented -->
 <!-- R code chunks start with "```{r }" and end with "```" -->
 <!-- Please do not change anything inside of code chunks, otherwise any latex code is allowed -->
 
 <!-- inline code with `r 0` -->
 
 
 ```{r setup, include=FALSE}
 options(width=80)
 #out.format <- knitr::opts_knit$get("out.format")
 out.format <- knitr::opts_knit$get("rmarkdown.pandoc.to")
 
 img_template <- switch( out.format,
                      docx = list("img-params"=list(dpi=150,
                                                fig.width=6,
                                                fig.height=6,
                                                out.width="504px",
                                                out.height="504px")),
                      {
                        # default
                        list("img-params"=list( fig.width=7,fig.height = 3,dpi=300))
                      } )
 
 knitr::opts_template$set( img_template )
 
 knitr::opts_chunk$set(echo = F) # ,#fig.width=7,fig.height = 3,dpi=300,
 
 if (out.format != "docx") knitr::opts_chunk$set(fig.align = "center")
 if (!is.null(params$fig.format)) knitr::opts_chunk$set(dev = params$fig.format) # can be vector, e.g. fig.format=c('jpeg','png', 'pdf')
 
 theme_set(theme_light())
 
 isMultiTask = params$isMultiTask
 bootstrappingEnabled = params$bootstrappingEnabled
 
 ```
 
 
 ```{r }
 object = params$object
 if (isMultiTask) {
 ordering_consensus=names(params$consensus)
 } else
 {
   ordering_consensus=names(sort(t(object$matlist[[1]][,"rank",drop=F])["rank",]))
 }
 color.fun=params$colors
 
 ```
 
 ```{r }
 
 challenge_multiple=object$data
 ranking.fun=object$FUN
 
 cols_numbered=cols=color.fun(length(ordering_consensus))
 names(cols)=ordering_consensus
 names(cols_numbered)= paste(1:length(cols),names(cols))
 
 if (bootstrappingEnabled) {
   boot_object = params$object
   challenge_multiple=boot_object$data
 
   ranking.fun=boot_object$FUN
   object=challenge_multiple%>%ranking.fun
   object$FUN.list = boot_object$FUN.list
 
   object$fulldata=boot_object$fulldata  # only not NULL if subset of algorithms used
 
   cols_numbered=cols=color.fun(length(ordering_consensus))
   names(cols)=ordering_consensus
   names(cols_numbered)= paste(1:length(cols),names(cols))
 }
 
 
 ```
 
 
 This document presents a systematic report on a benchmark study. Input data comprises raw metric values for all algorithms and test cases. Generated plots are:
 
 ```{r, child=if (!isMultiTask && !bootstrappingEnabled) system.file("appdir", "overviewSingleTaskNoBootstrapping.Rmd", package="challengeR")}
 
 ```
 
 ```{r, child=if (!isMultiTask && bootstrappingEnabled) system.file("appdir", "overviewSingleTaskBootstrapping.Rmd", package="challengeR")}
 
 ```
 
 ```{r, child=if (isMultiTask && !bootstrappingEnabled) system.file("appdir", "overviewMultiTaskNoBootstrapping.Rmd", package="challengeR")}
 
 ```
 
 ```{r, child=if (isMultiTask && bootstrappingEnabled) system.file("appdir", "overviewMultiTaskBootstrapping.Rmd", package="challengeR")}
 
 ```
 
 ```{r,results='asis'}
 
 if (!isMultiTask && !is.null(object$fulldata[[1]])) {
   cat("Only top ",
       length(levels(challenge_multiple[[1]][[attr(challenge_multiple,"algorithm")]])),
       " out of ",
       length(levels(object$fulldata[[1]][[attr(challenge_multiple,"algorithm")]])),
       " algorithms visualized.\n")
 }
 
 ```
 
 Ranking of algorithms within tasks according to the following chosen ranking scheme:
 
 ```{r,results='asis'}
 a=(  lapply(object$FUN.list[1:2],function(x) {
                  if (!is.character(x)) return(paste0("aggregate using function ",
                                                      paste(gsub("UseMethod","",
                                                                 deparse(functionBody(x))),
                                                            collapse=" ")
                                                      ))
                  else if (x=="rank") return(x)
                  else return(paste0("aggregate using function ",x))
   }))
 cat("&nbsp; &nbsp; *",paste0(a,collapse=" then "),"*",sep="")
 
 if (is.character(object$FUN.list[[1]]) && object$FUN.list[[1]]=="significance") cat("\n\n Column 'prop.sign' is equal to the number of pairwise significant test results for a given algorithm divided by the number of algorithms.")
 ```
 
 ```{r,results='asis'}
 
 if (isMultiTask) {
   cat("Ranking list for each task:\n")
   
   for (t in 1:length(object$matlist)){
     cat("\n",names(object$matlist)[t],": ")
     n.cases=nrow(challenge_multiple[[t]])/length(unique(challenge_multiple[[t]][[attr(challenge_multiple,"algorithm")]]))
     
     cat("\nAnalysis based on",
         n.cases,
-        "test cases.",
+        "cases.",
         attr(object$data,"n.missing")[[t]], "observations had been missing. ")
     
     if (nrow(attr(object$data,"missingData")[[t]])>0)  {
       cat("Performance of not all algorithms is observed for all cases in task '",
           names(object$matlist)[t],
           "'. Therefore, missings have been inserted in the following cases:")
       print(knitr::kable(as.data.frame(attr(object$data,"missingData")[[t]])))
     }
     
     if (nrow(attr(object$data,"missingData")[[t]])>0 | attr(object$data,"n.missing")[[t]]>0)  {
       if (is.numeric(attr(object$data,"na.treat"))) cat("All missings have been replaced by values of", attr(object$data,"na.treat"),".\n")
       else if (is.character(attr(object$data,"na.treat")) && attr(object$data,"na.treat")=="na.rm") cat("All missings have been removed.")
       else if (is.function(attr(object$data,"na.treat"))) {
         cat("Missings have been replaced using function ")
         print(attr(object$data,"na.treat"))
       }
       else if (is.character(object$FUN.list[[1]]) && object$FUN.list[[1]]=="rank") cat("Missings lead to the algorithm ranked last for the missing test case.")
     }
     
     x=object$matlist[[t]]
     print(knitr::kable(x[order(x$rank),]))
   }
 } else {
   n.cases=nrow(challenge_multiple[[1]])/length(unique(challenge_multiple[[1]][[attr(challenge_multiple,"algorithm")]]))
   
   cat("\nAnalysis based on",
       n.cases,
-      "test cases.",
+      "cases.",
       attr(object$data,"n.missing")[[1]], "observations had been missing. ")
   
   if (nrow(attr(object$data,"missingData")[[1]])>0)  {
     cat("Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:")
     print(knitr::kable(as.data.frame(attr(object$data,"missingData")[[1]])))
    }
   
   if (nrow(attr(object$data,"missingData")[[1]])>0 | attr(object$data,"n.missing")[[1]]>0)  {
     if (is.numeric(attr(object$data,"na.treat"))) cat("All missings have been replaced by values of", attr(object$data,"na.treat"),".\n")
     else if (is.character(attr(object$data,"na.treat")) && attr(object$data,"na.treat")=="na.rm") cat("All missings have been removed.")
     else if (is.function(attr(object$data,"na.treat"))) {
       cat("Missings have been replaced using function ")
       print(attr(object$data,"na.treat"))
     }
       else if (is.character(object$FUN.list[[1]]) && object$FUN.list[[1]]=="rank") cat("Missings lead to the algorithm ranked last for the missing test case.")
   }
   cat("\n\nRanking list:")
   
   x=object$matlist[[1]]
   print(knitr::kable(x[order(x$rank),]))
 }
 
 ```
 
 \bigskip
 
 
 ```{r, child=if (isMultiTask) system.file("appdir", "consensusRanking.Rmd", package="challengeR")}
 
 ```
 
 
 # Visualization of raw assessment data
 
 ```{r,results='asis'}
 
 if (isMultiTask) {
   cat("Algorithms are ordered according to chosen ranking scheme for each task.")
 }
 
 ```
 
 ## Dot- and boxplots
 
 *Dot- and boxplots* for visualizing raw assessment data separately for each algorithm. Boxplots representing descriptive statistics over all test cases (median, quartiles and outliers) are combined with horizontally jittered dots representing individual test cases.
 
 \bigskip
 
 ```{r boxplots}
-boxplot(object, size=.8) +xlab("Algorithm")+ylab("Metric value")
+boxplot(object, size=.8)
 ```
 
 
 
 ## Podium plots
 *Podium plots* (see also Eugster et al, 2008) for visualizing raw assessment data. Upper part (spaghetti plot): Participating algorithms are color-coded, and each colored dot in the plot represents a metric value achieved with the respective algorithm. The actual metric value is encoded by the y-axis. Each podium (here: $p$=`r length(ordering_consensus)`) represents one possible rank, ordered from best (1) to last (here: `r length(ordering_consensus)`). The assignment of metric values (i.e. colored dots) to one of the podiums is based on the rank that the respective algorithm achieved on the corresponding test case. Note that the plot part above each podium place is further subdivided into $p$ "columns", where each column represents one participating algorithm (here: $p=$ `r length(ordering_consensus)`).  Dots corresponding to identical test cases are connected by a line, leading to the shown spaghetti structure. Lower part: Bar charts represent the relative frequency for each algorithm to achieve the rank encoded by the podium place. 
 
 ```{r, include=FALSE, fig.keep="none",dev=NULL}
 plot.new()
 algs=ordering_consensus
 l=legend("topright", 
          paste0(1:length(algs),": ",algs), 
          lwd = 1, cex=1.4,seg.len=1.1,
          title="Rank: Alg.",
          plot=F) 
 
 w <- grconvertX(l$rect$w, to='ndc') - grconvertX(0, to='ndc')
 h<- grconvertY(l$rect$h, to='ndc') - grconvertY(0, to='ndc')
 addy=max(grconvertY(l$rect$h,"user","inches"),6)
 ```
 
 
 ```{r podium,eval=T,fig.width=12, fig.height=addy}
 #c(bottom, left, top, right
 
 op<-par(pin=c(par()$pin[1],6),
         omd=c(0, 1-w, 0, 1),
         mar=c(par('mar')[1:3], 0)+c(-.5,0.5,-.5,0),
         cex.axis=1.5,
         cex.lab=1.5,
         cex.main=1.7)
 
 oh=grconvertY(l$rect$h,"user","lines")-grconvertY(6,"inches","lines")
 if (oh>0) par(oma=c(oh,0,0,0))
 
 
 set.seed(38)
 podium(object,
        col=cols,
        lines.show = T, lines.alpha = .4,
        dots.cex=.9,
        ylab="Metric value",
        layout.heights=c(1,.35),
        legendfn = function(algs, cols) {
                  legend(par('usr')[2], par('usr')[4], 
                  xpd=NA, 
                  paste0(1:length(algs),": ",algs), 
                  lwd = 1, col =  cols, 
                  bg = NA,
                  cex=1.4, seg.len=1.1,
                  title="Rank: Alg.") 
         }
       )
 par(op)
   
 ```
 
 
 ## Ranking heatmaps
 *Ranking heatmaps* for visualizing raw assessment data. Each cell $\left( i, A_j \right)$ shows the absolute frequency of test cases in which algorithm $A_j$ achieved rank $i$.
 
 \bigskip
 
 ```{r rankingHeatmap,fig.width=9, fig.height=9,out.width='70%'}
 rankingHeatmap(object)
 ```
 
 
 
 # Visualization of ranking stability
 
 ```{r, child=if (bootstrappingEnabled) system.file("appdir", "visualizationBlobPlots.Rmd", package="challengeR")}
 
 ```
 
 ```{r, child=if (bootstrappingEnabled) system.file("appdir", "visualizationViolinPlots.Rmd", package="challengeR")}
 
 ```
 
 
 ## *Significance maps* for visualizing ranking stability based on statistical significance
 
 *Significance maps* depict incidence matrices of
 pairwise significant test results for the one-sided Wilcoxon signed rank test at a 5\% significance level with adjustment for multiple testing according to Holm. Yellow shading indicates that metric values of the algorithm on the x-axis were significantly superior to those from the algorithm on the y-axis, blue color indicates no significant difference.
 
 
 \bigskip
 
 ```{r significancemap,fig.width=6, fig.height=6,out.width='200%'}
 significanceMap(object,alpha=0.05,p.adjust.method="holm")
 ```
 
 <!-- \subsubsection{Hasse diagram} -->
 
 <!-- ```{r single_stability_significance_hasse, fig.height=19} -->
 <!-- plot(relensemble) -->
 <!-- ``` -->
 
 
 
 
 ## Ranking robustness to ranking methods
 *Line plots* for visualizing rankings robustness across different ranking methods. Each algorithm is represented by one colored line. For each ranking method encoded on the x-axis, the height of the line represents the corresponding rank. Horizontal lines indicate identical ranks for all methods.
 
 \bigskip
 
 ```{r lineplot,fig.width=8, fig.height=6,out.width='95%'}
 if (length(object$matlist)<=6 &nrow((object$matlist[[1]]))<=10 ){
   methodsplot(challenge_multiple,
               ordering = ordering_consensus,
               na.treat=object$call[[1]][[1]]$na.treat) + 
     scale_color_manual(values=cols)
 }else {
   x=challenge_multiple
   for (subt in names(challenge_multiple)){
      dd=as.challenge(x[[subt]],
                      value=attr(x,"value"), 
                      algorithm=attr(x,"algorithm") ,
                      case=attr(x,"case"),
                      annotator = attr(x,"annotator"), 
                      by=attr(x,"by"),
                      smallBetter = !attr(x,"largeBetter"),
                      na.treat=object$call[[1]][[1]]$na.treat
                      )
  
     print(methodsplot(dd,
                       ordering = ordering_consensus) + 
             scale_color_manual(values=cols)
           )
   }
 }
 ```
 
 
 
 ```{r, child=if (isMultiTask) system.file("appdir", "visualizationAcrossTasks.Rmd", package="challengeR")}
 
 ```
 
 
 
 # References
 
 Wiesenfarth, M., Reinke, A., Landman, B.A., Cardoso, M.J., Maier-Hein, L. and Kopp-Schneider, A. (2019). Methods and open-source toolkit for analyzing and visualizing challenge results. *arXiv preprint arXiv:1910.05121*
 
 M. J. A. Eugster, T. Hothorn, and F. Leisch, “Exploratory
 and inferential analysis of benchmark experiments,”
 Institut fuer Statistik, Ludwig-Maximilians-Universitaet Muenchen, Germany, Technical Report 30,
 2008. [Online]. Available: http://epub.ub.uni-muenchen.
 de/4134/.
diff --git a/tests/testthat/test-challenge.R b/tests/testthat/test-challenge.R
index e0ba197..a90bce0 100644
--- a/tests/testthat/test-challenge.R
+++ b/tests/testthat/test-challenge.R
@@ -1,574 +1,574 @@
 test_that("empty attribute 'taskName' raises error for single-task challenge", {
   data <- rbind(
       data.frame(algo="A1", value=0.8, case="C1"),
       data.frame(algo="A2", value=0.6, case="C1"))
 
   expect_error(as.challenge(data, taskName="", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "Argument 'taskName' is empty.", fixed=TRUE)
 })
 
 test_that("only whitespaces in attribute 'taskName' raises error for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   expect_error(as.challenge(data, taskName="  ", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "Argument 'taskName' is empty.", fixed=TRUE)
 })
 
 test_that("attributes are set for single-task challenge with specified task name", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   expect_equal(attr(actualChallenge, "annotator"), NULL)
   expect_equal(attr(actualChallenge, "by"), "task")
   expect_equal(attr(actualChallenge, "largeBetter"), TRUE)
   expect_equal(attr(actualChallenge, "check"), TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C1"))
   expect_equal(as.vector(actualChallenge$T1$task), c("T1", "T1"))
 
   # expect that there's no attribute "task"
   expect_equal(attr(actualChallenge, "task"), NULL)
   expect_equal(attr(actualChallenge$T1, "task"), NULL)
   expect_equal(attr(actualChallenge$T2, "task"), NULL)
 })
 
 test_that("attributes are set for single-task challenge with dummy task name", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   actualChallenge <- as.challenge(data, algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   expect_equal(attr(actualChallenge, "annotator"), NULL)
   expect_equal(attr(actualChallenge, "by"), "task")
   expect_equal(attr(actualChallenge, "largeBetter"), TRUE)
   expect_equal(attr(actualChallenge, "check"), TRUE)
 
   expect_equal(as.vector(actualChallenge$dummyTask$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$dummyTask$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$dummyTask$case), c("C1", "C1"))
   expect_equal(as.vector(actualChallenge$dummyTask$task), c("dummyTask", "dummyTask"))
 
   # expect that there's no attribute "task"
   expect_equal(attr(actualChallenge, "task"), NULL)
   expect_equal(attr(actualChallenge$dummyTask, "task"), NULL)
   expect_equal(attr(actualChallenge$dummyTask, "task"), NULL)
 })
 
 test_that("leading and trailing whitespaces are trimmed for attribute 'taskName'", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   actualChallenge <- as.challenge(data, taskName=" T1  ", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
 })
 
 test_that("attributes are set for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C1")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A2", value=0.3, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=TRUE)
 
   expect_equal(attr(actualChallenge, "annotator"), NULL)
   expect_equal(attr(actualChallenge, "by"), "task")
   expect_equal(attr(actualChallenge, "largeBetter"), FALSE)
   expect_equal(attr(actualChallenge, "check"), TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C1"))
   expect_equal(as.vector(actualChallenge$T1$task), c("T1", "T1"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.2, 0.3))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C1"))
   expect_equal(as.vector(actualChallenge$T2$task), c("T2", "T2"))
 
   # expect that there's no attribute "task"
   expect_equal(attr(actualChallenge, "task"), NULL)
   expect_equal(attr(actualChallenge$T1, "task"), NULL)
   expect_equal(attr(actualChallenge$T2, "task"), NULL)
 })
 
 test_that("attributes are set for multi-task challenge with sanity check disabled", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C1")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A2", value=0.3, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=TRUE, check=FALSE)
 
   expect_equal(attr(actualChallenge, "annotator"), NULL)
   expect_equal(attr(actualChallenge, "by"), "task")
   expect_equal(attr(actualChallenge, "largeBetter"), FALSE)
   expect_equal(attr(actualChallenge, "check"), FALSE)
   expect_equal(as.vector(actualChallenge$algo), c("A1", "A2", "A1", "A2"))
   expect_equal(as.vector(actualChallenge$value), c(0.8, 0.6, 0.2, 0.3))
   expect_equal(as.vector(actualChallenge$case), c("C1", "C1", "C1", "C1"))
   expect_equal(as.vector(actualChallenge$task), c("T1", "T1", "T2", "T2"))
 })
 
 test_that("attribute 'taskName' is ignored for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C1")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A2", value=0.3, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_warning(as.challenge(data, taskName="T1", by="task", algorithm="algo", case="case", value="value", smallBetter=TRUE),
                  "Argument 'taskName' is ignored for multi-task data set.", fixed=TRUE)
 })
 
 test_that("missing algorithm performances are added as NAs for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_message(actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-                 "Performance of not all algorithms is observed for all cases in task 'T1'. Inserted as missings in following cases:", fixed=TRUE)
+                 "Performance of not all algorithms is observed for all cases. Inserted as missings in following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, NA, NA, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("multi-task data set containing one task is interpreted as single-task data set, missing algorithm performances are added", {
   data <- cbind(task="T1",
                 rbind(
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A2", value=0.6, case="C2")
                 ))
 
   # do not specify parameter "by" to interpret multi-task data set as single-task data set
   expect_message(actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-                 "Performance of not all algorithms is observed for all cases in task 'T1'. Inserted as missings in following cases:", fixed=TRUE)
+                 "Performance of not all algorithms is observed for all cases. Inserted as missings in following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, NA, NA, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("missing algorithm performances are added as NAs for multi-task challenge (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A1", value=0.3, case="C2"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_message(actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                  "Performance of not all algorithms is observed for all cases in task 'T1'. Inserted as missings in following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, NA, NA, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.2, 0.3, 0.4, NA))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("missing algorithm performances are not added as NA with sanity check disabled for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, check=FALSE)
 
   expect_equal(as.vector(actualChallenge$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$case), c("C1", "C2"))
 })
 
 test_that("missing algorithm performances are not added as NA with sanity check disabled for multi-task challenge (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A1", value=0.3, case="C2"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, check=FALSE)
 
   expect_equal(as.vector(actualChallenge$algo), c("A1", "A2", "A1", "A1", "A2"))
   expect_equal(as.vector(actualChallenge$value), c(0.8, 0.6, 0.2, 0.3, 0.4))
   expect_equal(as.vector(actualChallenge$case), c("C1", "C2", "C1", "C2", "C1"))
 })
 
 test_that("case cannot appear more than once per algorithm for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=0.8, case="C1"))
 
   expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-               "Case(s) (C1) appear(s) more than once for the same algorithm in task 'T1'.", fixed=TRUE)
+               "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1", fixed=TRUE)
 })
 
 test_that("multi-task data set containing one task is interpreted as single-task data set, case cannot appear more than once per algorithm", {
   data <- cbind(task="T1",
                 rbind(
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A1", value=0.8, case="C1")
                 ))
 
   # do not specify parameter "by" to interpret multi-task data set as single-task data set
   expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-               "Case(s) (C1) appear(s) more than once for the same algorithm in task 'T1'.", fixed=TRUE)
+               "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1", fixed=TRUE)
 })
 
 test_that("case cannot appear more than once per algorithm for multi-task challenge (1 task in data set)", {
   data <- cbind(task="T1",
                 rbind(
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A1", value=0.8, case="C1")
                 ))
 
   expect_error(as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-               "Case(s) (C1) appear(s) more than once for the same algorithm in task 'T1'.", fixed=TRUE)
+               "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1", fixed=TRUE)
 })
 
 test_that("cases cannot appear more than once per algorithm for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.7, case="C1"),
     data.frame(algo="A1", value=0.5, case="C2"),
     data.frame(algo="A2", value=0.6, case="C2"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-               "Case(s) (C1, C2) appear(s) more than once for the same algorithm in task 'T1'.", fixed=TRUE)
+               "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1, C2", fixed=TRUE)
 })
 
 test_that("cases cannot appear more than once per algorithm for multi-task challenge (1 task in data set)", {
   data <- cbind(task="T1",
                 rbind(
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A2", value=0.7, case="C1"),
                   data.frame(algo="A1", value=0.5, case="C2"),
                   data.frame(algo="A2", value=0.6, case="C2"),
                   data.frame(algo="A2", value=0.6, case="C2")
                 ))
 
   expect_error(as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-               "Case(s) (C1, C2) appear(s) more than once for the same algorithm in task 'T1'.", fixed=TRUE)
+               "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1, C2", fixed=TRUE)
 })
 
 test_that("cases cannot appear more than once per algorithm for multi-task challenge (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1") # let T1 pass
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.7, case="C1"),
                        data.frame(algo="A1", value=0.5, case="C2"),
                        data.frame(algo="A2", value=0.6, case="C2"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_error(as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-               "Case(s) (C1, C2) appear(s) more than once for the same algorithm in task 'T2'.", fixed=TRUE)
+               "The following case(s) appear(s) more than once for the same algorithm in task 'T2'. Please revise.\nCase(s): C1, C2", fixed=TRUE)
 })
 
 test_that("cases cannot appear more than once per algorithm when missing data was added for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
-  #expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-  #             "Case(s) (C1, C2) appear(s) more than once for the same algorithm in task 'T1'.", fixed=TRUE)
+  expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
+               "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1, C2", fixed=TRUE)
 })
 
 test_that("user is notified of duplicate cases when multi-task data set is interpreted as single-task data set (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1")
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   # do not specify parameter "by" to interpret multi-task data set as single-task data set
   expect_error(as.challenge(data, taskName="New task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-               "Case(s) (C1) appear(s) more than once for the same algorithm in task 'New task'.", fixed=TRUE)
+               "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1", fixed=TRUE)
 })
 
 test_that("user is notified of missing algorithm performance when multi-task data set is interpreted as single-task data set (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1")
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   # do not specify parameter "by" to interpret multi-task data set as single-task data set
   expect_message(as.challenge(data, taskName="New task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
-                 "Performance of not all algorithms is observed for all cases in task 'New task'. Inserted as missings in following cases:", fixed=TRUE)
+                 "Performance of not all algorithms is observed for all cases. Inserted as missings in following cases:", fixed=TRUE)
 })
 
 test_that("NAs are replaced by numeric value for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=NA, case="C2"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A2", value=NA, case="C2"))
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=0)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.0, 0.6, 0.0))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("NAs are replaced by numeric value for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A1", value=NA, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A2", value=NA, case="C1"),
                        data.frame(algo="A2", value=0.5, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=0)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.0))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A2", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.0, 0.5))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2"))
 })
 
 test_that("NAs are replaced by function value for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=NA, case="C2"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A2", value=NA, case="C2"))
 
   replacementFunction <- function(x) { 2 }
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=replacementFunction)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 2.0, 0.6, 2.0))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("NAs are replaced by function value for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A1", value=NA, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A2", value=NA, case="C1"),
                        data.frame(algo="A2", value=0.5, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   replacementFunction <- function(x) { 2 }
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=replacementFunction)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 2.0))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A2", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(2.0, 0.5))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2"))
 })
 
 test_that("NAs are removed for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=NA, case="C2"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A2", value=NA, case="C2"))
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat="na.rm")
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C1"))
 })
 
 test_that("NAs are removed for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A1", value=NA, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A2", value=NA, case="C1"),
                        data.frame(algo="A2", value=0.5, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat="na.rm")
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.5))
   expect_equal(as.vector(actualChallenge$T2$case), c("C2"))
 })
 
 test_that("automatically added NAs are replaced by numeric value for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_message(actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=0),
-                 "Performance of not all algorithms is observed for all cases in task 'T1'. Inserted as missings in following cases:", fixed=TRUE)
+                 "Performance of not all algorithms is observed for all cases. Inserted as missings in following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.0, 0.0, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("automatically added NAs are replaced by numeric value for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A1", value=0.3, case="C2"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_message(actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=0),
                  "Performance of not all algorithms is observed for all cases in task 'T1'. Inserted as missings in following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.0, 0.0, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.2, 0.3, 0.4, 0.0))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("automatically added NAs are removed for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_message(actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat="na.rm"),
-                 "Performance of not all algorithms is observed for all cases in task 'T1'. Inserted as missings in following cases:", fixed=TRUE)
+                 "Performance of not all algorithms is observed for all cases. Inserted as missings in following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2"))
 })
 
 test_that("automatically added NAs are removed for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A1", value=0.3, case="C2"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_message(actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat="na.rm"),
                  "Performance of not all algorithms is observed for all cases in task 'T1'. Inserted as missings in following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A1", "A1", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.2, 0.3, 0.4))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2", "C1"))
 })
diff --git a/tests/testthat/test-rankThenAggregate.R b/tests/testthat/test-rankThenAggregate.R
index 8ce5459..b76e52a 100644
--- a/tests/testthat/test-rankThenAggregate.R
+++ b/tests/testthat/test-rankThenAggregate.R
@@ -1,286 +1,286 @@
 test_that("rank-then-aggregate by mean works with two algorithms for one case, small values are better", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A2", value=0.8, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
   expectedRanking <- rbind(
-    "A1" = data.frame(rank_FUN = 1, rank = 1),
-    "A2" = data.frame(rank_FUN = 2, rank = 2))
+    "A1" = data.frame(rank_mean = 1, rank = 1),
+    "A2" = data.frame(rank_mean = 2, rank = 2))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate by mean works with two algorithms (reverse order) for one case, small values are better", {
   data <- rbind(
             data.frame(algo = "A2", value = 0.8, case = "C1"),
             data.frame(algo = "A1", value = 0.6, case = "C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm = "algo", case = "case", value = "value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
-  expectedRanking <- rbind("A2" = data.frame(rank_FUN = 2, rank = 2),
-                           "A1" = data.frame(rank_FUN = 1, rank = 1))
+  expectedRanking <- rbind("A2" = data.frame(rank_mean = 2, rank = 2),
+                           "A1" = data.frame(rank_mean = 1, rank = 1))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate by mean works with two algorithms for one case, large values are better", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A2", value=0.8, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
   expectedRanking <- rbind(
-    "A1" = data.frame(rank_FUN = 2, rank = 2),
-    "A2" = data.frame(rank_FUN = 1, rank = 1))
+    "A1" = data.frame(rank_mean = 2, rank = 2),
+    "A2" = data.frame(rank_mean = 1, rank = 1))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate by mean works with two algorithms (reverse order) for one case, large values are better", {
   data <- rbind(
     data.frame(algo = "A2", value = 0.8, case = "C1"),
     data.frame(algo = "A1", value = 0.6, case = "C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm = "algo", case = "case", value = "value", smallBetter = FALSE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
-  expectedRanking <- rbind("A2" = data.frame(rank_FUN = 1, rank = 1),
-                           "A1" = data.frame(rank_FUN = 2, rank = 2))
+  expectedRanking <- rbind("A2" = data.frame(rank_mean = 1, rank = 1),
+                           "A1" = data.frame(rank_mean = 2, rank = 2))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate raises error for invalid aggregation function", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   expect_error(challenge%>%rankThenAggregate(FUN = meanx),
                "object 'meanx' not found", fixed = TRUE)
 })
 
 test_that("rank-then-aggregate by mean works with two algorithms for one case and 'min' as ties method", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean, ties.method = "min")
 
   expectedRanking <- rbind(
-    "A1" = data.frame(rank_FUN = 1, rank = 1),
-    "A2" = data.frame(rank_FUN = 1, rank = 1))
+    "A1" = data.frame(rank_mean = 1, rank = 1),
+    "A2" = data.frame(rank_mean = 1, rank = 1))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate by mean works with two algorithms for one case and 'max' as ties method", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean, ties.method = "max")
 
   expectedRanking <- rbind(
-    "A1" = data.frame(rank_FUN = 2, rank = 2),
-    "A2" = data.frame(rank_FUN = 2, rank = 2))
+    "A1" = data.frame(rank_mean = 2, rank = 2),
+    "A2" = data.frame(rank_mean = 2, rank = 2))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate raises error for invalid ties method", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   expect_error(challenge%>%rankThenAggregate(FUN = mean, ties.method = "maxx"),
                "'arg' should be one of \"average\", \"first\", \"last\", \"random\", \"max\", \"min\"", fixed = TRUE)
 })
 
 test_that("rank-then-aggregate raises error for invalid ties method even when no ties present", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A2", value=0.8, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   expect_error(challenge%>%rankThenAggregate(FUN = mean, ties.method = "maxx"),
                "'arg' should be one of \"average\", \"first\", \"last\", \"random\", \"max\", \"min\"", fixed = TRUE)
 })
 
 test_that("rank-then-aggregate by mean works with two algorithms for two cases", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A1", value=0.4, case="C2"),
     data.frame(algo="A2", value=0.8, case="C1"),
     data.frame(algo="A2", value=1.0, case="C2"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
   expectedRanking <- rbind(
-    "A1" = data.frame(rank_FUN = 1, rank = 1),
-    "A2" = data.frame(rank_FUN = 2, rank = 2))
+    "A1" = data.frame(rank_mean = 1, rank = 1),
+    "A2" = data.frame(rank_mean = 2, rank = 2))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate by median works with two algorithms for two cases", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"),
     data.frame(algo="A1", value=0.4, case="C2"),
     data.frame(algo="A2", value=0.8, case="C1"),
     data.frame(algo="A2", value=1.0, case="C2"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = median)
 
   expectedRanking <- rbind(
-    "A1" = data.frame(rank_FUN = 1, rank = 1),
-    "A2" = data.frame(rank_FUN = 2, rank = 2))
+    "A1" = data.frame(rank_median = 1, rank = 1),
+    "A2" = data.frame(rank_median = 2, rank = 2))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate by mean works with one algorithm for one case", {
   data <- rbind(
     data.frame(algo="A1", value=0.6, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
   expectedRanking <- rbind(
-    "A1" = data.frame(rank_FUN = 1, rank = 1))
+    "A1" = data.frame(rank_mean = 1, rank = 1))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate assigns worst rank for NA", {
   data <- rbind(
     data.frame(algo="A1", value=NA, case="C1"),
     data.frame(algo="A2", value=0.8, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
   expectedRanking <- rbind(
-    "A1" = data.frame(rank_FUN = 2, rank = 2),
-    "A2" = data.frame(rank_FUN = 1, rank = 1))
+    "A1" = data.frame(rank_mean = 2, rank = 2),
+    "A2" = data.frame(rank_mean = 1, rank = 1))
 
   expect_equal(ranking$matlist$T1, expectedRanking)
 })
 
 test_that("rank-then-aggregate raises error for unused NA treatment argument", {
   data <- rbind(
     data.frame(algo="A1", value=NA, case="C1"),
     data.frame(algo="A2", value=0.8, case="C1"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE)
 
   expect_error(challenge%>%rankThenAggregate(FUN = mean, na.treat = 0),
                "unused argument (na.treat = 0)", fixed = TRUE)
 })
 
 test_that("rank-then-aggregate by mean works for multi-task challenge (2 tasks in data set), no missing data", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.6, case="C1"),
                        data.frame(algo="A2", value=0.8, case="C1")
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.5, case="C1"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
   expectedRankingTask1 <- rbind(
-    "A1" = data.frame(rank_FUN = 1, rank = 1),
-    "A2" = data.frame(rank_FUN = 2, rank = 2))
+    "A1" = data.frame(rank_mean = 1, rank = 1),
+    "A2" = data.frame(rank_mean = 2, rank = 2))
 
   expectedRankingTask2 <- rbind(
-    "A1" = data.frame(rank_FUN = 2, rank = 2),
-    "A2" = data.frame(rank_FUN = 1, rank = 1))
+    "A1" = data.frame(rank_mean = 2, rank = 2),
+    "A2" = data.frame(rank_mean = 1, rank = 1))
 
   expect_equal(ranking$matlist$T1, expectedRankingTask1)
   expect_equal(ranking$matlist$T2, expectedRankingTask2)
 })
 
 test_that("rank-then-aggregate assigns worst rank for NA in multi-task challenge (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.6, case="C1"),
                        data.frame(algo="A2", value=0.8, case="C1")
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=NA, case="C1"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   ranking <- challenge%>%rankThenAggregate(FUN = mean)
 
   expectedRankingTask1 <- rbind(
-    "A1" = data.frame(rank_FUN = 1, rank = 1),
-    "A2" = data.frame(rank_FUN = 2, rank = 2))
+    "A1" = data.frame(rank_mean = 1, rank = 1),
+    "A2" = data.frame(rank_mean = 2, rank = 2))
 
   expectedRankingTask2 <- rbind(
-    "A1" = data.frame(rank_FUN = 2, rank = 2),
-    "A2" = data.frame(rank_FUN = 1, rank = 1))
+    "A1" = data.frame(rank_mean = 2, rank = 2),
+    "A2" = data.frame(rank_mean = 1, rank = 1))
 
   expect_equal(ranking$matlist$T1, expectedRankingTask1)
   expect_equal(ranking$matlist$T2, expectedRankingTask2)
 })
 
 test_that("rank-then-aggregate raises error for unused NA treatment argument in multi-task challenge (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.6, case="C1"),
                        data.frame(algo="A2", value=0.8, case="C1")
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=NA, case="C1"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE)
 
   expect_error(challenge%>%rankThenAggregate(FUN = mean, na.treat = "na.rm"),
                "unused argument (na.treat = \"na.rm\")", fixed = TRUE)
 })
diff --git a/tests/testthat/test-subset.R b/tests/testthat/test-subset.R
index 31622fb..8f031c2 100644
--- a/tests/testthat/test-subset.R
+++ b/tests/testthat/test-subset.R
@@ -1,220 +1,220 @@
 test_that("top 2 performing algorithms are extracted and data set is reduced respectively", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A3", value=0.4, case="C1"),
     data.frame(algo="A1", value=0.2, case="C2"),
     data.frame(algo="A2", value=0.1, case="C2"),
     data.frame(algo="A3", value=0.0, case="C2"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min")
 
   rankingSubset <- subset(ranking, top=2)
 
   expectedRankingSubset <- rbind(
-    "A1" = data.frame(value_FUN = 0.5, rank = 1),
-    "A2" = data.frame(value_FUN = 0.35, rank = 2))
+    "A1" = data.frame(value_mean = 0.5, rank = 1),
+    "A2" = data.frame(value_mean = 0.35, rank = 2))
 
   expect_equal(rankingSubset$matlist$T1, expectedRankingSubset)
 
   expect_equal(as.vector(rankingSubset$data$T1$algo), c("A1", "A2", "A1", "A2"))
   expect_equal(as.vector(rankingSubset$data$T1$value), c(0.8, 0.6, 0.2, 0.1))
   expect_equal(as.vector(rankingSubset$data$T1$case), c("C1", "C1", "C2", "C2"))
   expect_equal(as.vector(rankingSubset$data$T1$task), c("T1", "T1", "T1", "T1"))
 })
 
 test_that("extraction of subset raises error for multi-task data set", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C1"),
                        data.frame(algo="A3", value=0.4, case="C1"),
                        data.frame(algo="A1", value=0.2, case="C2"),
                        data.frame(algo="A2", value=0.1, case="C2"),
                        data.frame(algo="A3", value=0.0, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A2", value=0.3, case="C1"),
                        data.frame(algo="A3", value=0.4, case="C1"),
                        data.frame(algo="A1", value=0.7, case="C2"),
                        data.frame(algo="A2", value=0.8, case="C2"),
                        data.frame(algo="A3", value=0.9, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min")
 
   expect_error(subset(ranking, top=2),
                "Subset of algorithms only sensible for single-task challenges.", fixed=TRUE)
 })
 
 test_that("extraction of subset returns all algorithms even when more are requested", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A3", value=0.4, case="C1"),
     data.frame(algo="A1", value=0.2, case="C2"),
     data.frame(algo="A2", value=0.1, case="C2"),
     data.frame(algo="A3", value=0.0, case="C2"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min")
 
   rankingSubset <- subset(ranking, top=4)
 
   expectedRankingSubset <- rbind(
-    "A1" = data.frame(value_FUN = 0.5, rank = 1),
-    "A2" = data.frame(value_FUN = 0.35, rank = 2),
-    "A3" = data.frame(value_FUN = 0.2, rank = 3))
+    "A1" = data.frame(value_mean = 0.5, rank = 1),
+    "A2" = data.frame(value_mean = 0.35, rank = 2),
+    "A3" = data.frame(value_mean = 0.2, rank = 3))
 
   expect_equal(rankingSubset$matlist$T1, expectedRankingSubset)
 })
 
 test_that("extraction of subset returns more algorithms then requested when ties are present", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.8, case="C1"),
     data.frame(algo="A3", value=0.8, case="C1"),
     data.frame(algo="A1", value=0.2, case="C2"),
     data.frame(algo="A2", value=0.2, case="C2"),
     data.frame(algo="A3", value=0.2, case="C2"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min")
 
   rankingSubset <- subset(ranking, top=2)
 
   expectedRankingSubset <- rbind(
-    "A1" = data.frame(value_FUN = 0.5, rank = 1),
-    "A2" = data.frame(value_FUN = 0.5, rank = 1),
-    "A3" = data.frame(value_FUN = 0.5, rank = 1))
+    "A1" = data.frame(value_mean = 0.5, rank = 1),
+    "A2" = data.frame(value_mean = 0.5, rank = 1),
+    "A3" = data.frame(value_mean = 0.5, rank = 1))
 
   expect_equal(rankingSubset$matlist$T1, expectedRankingSubset)
 })
 
 test_that("top 2 performing algorithms are extracted from bootstrap ranking and data set is reduced respectively", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A3", value=0.4, case="C1"),
     data.frame(algo="A1", value=0.2, case="C2"),
     data.frame(algo="A2", value=0.1, case="C2"),
     data.frame(algo="A3", value=0.0, case="C2"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min")
 
   set.seed(1)
   rankingBootstrapped <- ranking%>%bootstrap(nboot=10)
 
   rankingBootstrappedSubset <- subset(rankingBootstrapped, top=2)
 
   expectedRankingSubset <- rbind(
-    "A1" = data.frame(value_FUN = 0.5, rank = 1),
-    "A2" = data.frame(value_FUN = 0.35, rank = 2))
+    "A1" = data.frame(value_mean = 0.5, rank = 1),
+    "A2" = data.frame(value_mean = 0.35, rank = 2))
 
   expect_equal(rankingBootstrappedSubset$matlist$T1, expectedRankingSubset)
 
   expect_equal(as.vector(rankingBootstrappedSubset$data$T1$algo), c("A1", "A2", "A1", "A2"))
   expect_equal(as.vector(rankingBootstrappedSubset$data$T1$value), c(0.8, 0.6, 0.2, 0.1))
   expect_equal(as.vector(rankingBootstrappedSubset$data$T1$case), c("C1", "C1", "C2", "C2"))
   expect_equal(as.vector(rankingBootstrappedSubset$data$T1$task), c("T1", "T1", "T1", "T1"))
 
   expect_equal(dim(rankingBootstrappedSubset$bootsrappedRanks$T1), c(2, 10))
   expect_equal(dim(rankingBootstrappedSubset$bootsrappedAggregate$T1), c(2, 10))
 })
 
 test_that("extraction of bootstrap ranking subset raises error for multi-task data set", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C1"),
                        data.frame(algo="A3", value=0.4, case="C1"),
                        data.frame(algo="A1", value=0.2, case="C2"),
                        data.frame(algo="A2", value=0.1, case="C2"),
                        data.frame(algo="A3", value=0.0, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A2", value=0.3, case="C1"),
                        data.frame(algo="A3", value=0.4, case="C1"),
                        data.frame(algo="A1", value=0.7, case="C2"),
                        data.frame(algo="A2", value=0.8, case="C2"),
                        data.frame(algo="A3", value=0.9, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min")
 
   set.seed(1)
   rankingBootstrapped <- ranking%>%bootstrap(nboot=10)
 
   expect_error(subset(subset(rankingBootstrapped, top=2), top=2),
                "Subset of algorithms only sensible for single-task challenges.", fixed=TRUE)
 })
 
 test_that("extraction of bootstrap ranking subset returns all algorithms even when more are requested", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A3", value=0.4, case="C1"),
     data.frame(algo="A1", value=0.2, case="C2"),
     data.frame(algo="A2", value=0.1, case="C2"),
     data.frame(algo="A3", value=0.0, case="C2"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min")
 
   set.seed(1)
   rankingBootstrapped <- ranking%>%bootstrap(nboot=10)
 
   rankingBootstrappedSubset <- subset(rankingBootstrapped, top=4)
 
   expectedRankingSubset <- rbind(
-    "A1" = data.frame(value_FUN = 0.5, rank = 1),
-    "A2" = data.frame(value_FUN = 0.35, rank = 2),
-    "A3" = data.frame(value_FUN = 0.2, rank = 3))
+    "A1" = data.frame(value_mean = 0.5, rank = 1),
+    "A2" = data.frame(value_mean = 0.35, rank = 2),
+    "A3" = data.frame(value_mean = 0.2, rank = 3))
 
   expect_equal(rankingBootstrappedSubset$matlist$T1, expectedRankingSubset)
 })
 
 test_that("extraction of bootstrap ranking subset returns more algorithms then requested when ties are present", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.8, case="C1"),
     data.frame(algo="A3", value=0.8, case="C1"),
     data.frame(algo="A1", value=0.2, case="C2"),
     data.frame(algo="A2", value=0.2, case="C2"),
     data.frame(algo="A3", value=0.2, case="C2"))
 
   challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min")
 
   set.seed(1)
   rankingBootstrapped <- ranking%>%bootstrap(nboot=10)
 
   rankingBootstrappedSubset <- subset(rankingBootstrapped, top=2)
 
   expectedRankingSubset <- rbind(
-    "A1" = data.frame(value_FUN = 0.5, rank = 1),
-    "A2" = data.frame(value_FUN = 0.5, rank = 1),
-    "A3" = data.frame(value_FUN = 0.5, rank = 1))
+    "A1" = data.frame(value_mean = 0.5, rank = 1),
+    "A2" = data.frame(value_mean = 0.5, rank = 1),
+    "A3" = data.frame(value_mean = 0.5, rank = 1))
 
   expect_equal(rankingBootstrappedSubset$matlist$T1, expectedRankingSubset)
 })