diff --git a/R/Aggregate.R b/R/Aggregate.R
index 23b9d59..2bf027c 100644
--- a/R/Aggregate.R
+++ b/R/Aggregate.R
@@ -1,96 +1,96 @@
 Aggregate <- function(object,...) UseMethod("Aggregate")
 Aggregate.default <- function(object, ...) aggregate(object,...)  #stats::aggregate
 
 Aggregate.list <-function(object,
                           x,
                           algorithm,
                           FUN = mean,
                           na.treat = "na.rm",
                           parallel = FALSE,
                           progress = "none",
                           case,
                           alpha = 0.05,
                           p.adjust.method = "none",
                           alternative = "one.sided",
                           test.fun = function(x, y) wilcox.test(x,
                                                                 y,
                                                                 alternative = alternative,
                                                                 exact = FALSE,
                                                                 paired = TRUE)$p.value,
-                          largeBetter = TRUE,   # only needed for significance
+                          smallBetter = FALSE,   # only needed for significance
                           ...            ) {
   call=match.call(expand.dots = T)
   if (is.character(FUN) && FUN=="significance"){
-    if(missing(case)|  missing(largeBetter)|  missing(alpha)) stop("If FUN='significance' arguments case, largeBetter and alpha need to be given")
+    if(missing(case)|  missing(smallBetter)|  missing(alpha)) stop("If FUN='significance' arguments case, smallBetter and alpha need to be given")
     matlist=llply(1:length(object),
                   function(id){
                     piece=object[[id]]
                     if (length(unique(piece[[algorithm]]))<=1){
                       warning("Only one algorithm available in task '", names(object)[id], "'.")
                       return(data.frame("prop_significance"=rep(NA,length(unique(piece[[algorithm]]))),
                                         row.names = unique(piece[[algorithm]])))
                     }
                     if (is.numeric(na.treat)) piece[,x][is.na(piece[,x])]=na.treat
                     else if (is.function(na.treat)) piece[,x][is.na(piece[,x])]=na.treat(piece[,x][is.na(piece[,x])])
                     else if (na.treat=="na.rm") piece=piece[!is.na(piece[,x]),]
                     else stop("Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.")
 
                     xmean <- significance(piece,
                                           x,
                                           algorithm,
                                           case,
                                           alpha,
                                           p.adjust.method=p.adjust.method,
-                                          largeBetter,
+                                          smallBetter,
                                           alternative=alternative,
                                           ...)
                     class(xmean)=c("aggregated",
                                    class(xmean))
                     xmean
                     },
                   .parallel=parallel,
                   .progress=progress
     )
     isSignificance=TRUE
 
   } else {
     if (is.function(FUN)) FUNname <-gsub('\")',"",gsub('UseMethod(\"',"",deparse(functionBody(FUN)),fixed = T),fixed=T)
     else if (is.character(FUN)) FUNname=FUN
 
     if (is.character(FUN)) FUN=try(eval(parse(text=FUN)),
                                    silent = T)
     if (!is.function(FUN)) stop("FUN has to be a function (possibly as character) or 'significance'")
 
     matlist=llply(object,
                   function(piece){
                     if (is.numeric(na.treat)) piece[,x][is.na(piece[,x])]=na.treat
                     else if (is.function(na.treat)) piece[,x][is.na(piece[,x])]=na.treat(piece[,x][is.na(piece[,x])])
                     else if (na.treat=="na.rm") piece=piece[!is.na(piece[,x]),]
                     else stop("Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.")
 
                     xmean <- aggregate(piece[,x],
                                        by=list(piece[,algorithm]),
                                        FUN=function(z) do.call(FUN,args=list(x=z)))
                     names(xmean)=c(algorithm,
                                    paste0(x,"_",FUNname))
                     rownames(xmean)=xmean[,1]
                     xmean=xmean[,-1,drop=F]
                     xmean
                   },
                   .parallel=parallel,
                   .progress=progress
                   )
     isSignificance=FALSE
   }
   names(matlist)=names(object)
   res=list(FUN = . %>% (call),
            FUN.list=list(FUN),
            call=list(call),
            data=object,
            matlist=matlist,
            isSignificance=isSignificance
            )
 
   class(res)=c("aggregated.list",class(res))
   res
 }
diff --git a/R/Rank.R b/R/Rank.R
index d7119eb..68cc837 100644
--- a/R/Rank.R
+++ b/R/Rank.R
@@ -1,74 +1,74 @@
 Rank <- function(object,...) UseMethod("Rank")
 Rank.default <- function(object, ...) rank(object,...)  #base::rank
 
 Rank.list <- function(object,
                       x,
                       annotator,
                       ties.method="min",
-                      largeBetter=FALSE,
+                      smallBetter=TRUE,
                       ...){
 
     call=match.call(expand.dots = T)
     annotator.missing=missing(annotator)
     if (any(sapply(object,
                    function(task) {
                      (attr(object,"check") &&
-                      largeBetter &&
+                      smallBetter &&
                       any(is.na(task[[x]])) &&
                       min(task[[x]], na.rm=TRUE)==0)
                   })
             )) {
         message("There are missing metric values and metric values exactly equal to zero.
                  Have some actually missing values been entered as zero in some instances?
                  If yes, specify optional argument na.treat=0 in as.challenge().")
     }
 
     matlist=lapply(object,
                    function(task){
                      if (annotator.missing){
                        res=bind_rows(
                          lapply(split(task,
                                       task[[attr(object,"case")]]),
                                 function(task.case)
                                   cbind(task.case,
                                         rank=rankNA2(task.case[[x]],
                                                      ties.method = ties.method,
-                                                     largeBetter = largeBetter)
+                                                     smallBetter = smallBetter)
                                         )
                                 )
                          )
                         class(res)[2]="ranked"
                         res
                        } else {
                          byAnnotator=split(task,
                                            as.list(task[,annotator]))
                          temp=bind_rows(
                            lapply(byAnnotator,
                                   function(annotator){
                                     bind_rows(
                                       lapply(split(annotator,
                                                    annotator[[attr(object,"case")]]),
                                              function(annotator.case)
                                                cbind(annotator.case,
                                                      rank=rankNA2(annotator.case[[x]],
                                                                   ties.method = ties.method,
-                                                                  largeBetter = largeBetter)
+                                                                  smallBetter = smallBetter)
                                                      )
                                              )
                                     )
                                     }
                                   )
                            )
                          class(temp)[2]="ranked"
                          temp
                          }
                      }
                    )
     res=list(FUN = . %>% (call),
              call=list(call),
              data=object,
              matlist=matlist)
 
     class(res)=c("ranked.list",class(res))
     res
 }
diff --git a/R/Rank.aggregated.list.R b/R/Rank.aggregated.list.R
index 6d64223..d5d9637 100644
--- a/R/Rank.aggregated.list.R
+++ b/R/Rank.aggregated.list.R
@@ -1,73 +1,73 @@
 rank.aggregated.list <-function(object,
                                 ties.method="min",
-                                largeBetter,
+                                smallBetter,
                                 ...){
-  
+
   call=match.call(expand.dots = F)
-  if (missing(largeBetter)){
-    if (!is.null(attr(object$data,"largeBetter"))) largeBetter=attr(object$data,"largeBetter")
-    else stop("largeBetter has to be provided either in as.challenge() or rank()")
+  if (missing(smallBetter)){
+    if (!is.null(attr(object$data,"smallBetter"))) smallBetter=attr(object$data,"smallBetter")
+    else stop("smallBetter has to be provided either in as.challenge() or rank()")
+
+    if (object$isSignificance) smallBetter=FALSE  # smallBetter already taken care of by one-sided test nature of signficance
+  }
 
-    if (object$isSignificance) largeBetter=TRUE  # smallBetter (largeBetter) already taken care of by one-sided test nature of signficance
-  }  
-  
   call=call("rank.aggregated.list",
             object=call$object,
             ties.method=ties.method,
-            largeBetter=largeBetter)
-  
+            smallBetter=smallBetter)
+
   matlist=object$matlist
 
-  matlist=lapply(matlist, 
+  matlist=lapply(matlist,
                  function(y){
                    if (nrow(y)>0) r=rankNA2(y[,ncol(y)],
                                             ties.method=ties.method,
-                                            largeBetter=largeBetter)
+                                            smallBetter=smallBetter)
                    else r=NULL
                    res=cbind(y,rank=r)
                    res
                    })
-  
+
   res=list(matlist=matlist,
            data=object$data,
            call=list(object$call,call),
            FUN =  . %>% (object$FUN) %>%  (call),
            FUN.list=c(object$FUN.list,
                       "rank")
       )
   class(res)=c("ranked.list",class(res))
 
   res
 }
 
 rank.aggregatedRanks.list <-function(object,
                                      ties.method="min",
                                      ...){
-  
-  call=match.call(expand.dots = F)  
+
+  call=match.call(expand.dots = F)
   call=call("rank.aggregatedRanks.list",
             object=call$object,
             ties.method=ties.method)
   matlist=object$matlist
 
   matlist=lapply(matlist, function(y){
         if (nrow(y)>0) r=rankNA2(y[,ncol(y)],
                                  ties.method=ties.method,
-                                 largeBetter=FALSE)
+                                 smallBetter=TRUE)
         else r=NULL
         res=cbind(y,rank=r)
         res
   })
-  
+
   res=list(matlist=matlist,
            data=object$data,
            call=list(object$call,call),
            FUN =  . %>% (object$FUN) %>%  (call),
            FUN.list=c(object$FUN.list,
                       "rank")
            )
   class(res)=c("ranked.list",class(res))
   res
 
   res
 }
diff --git a/R/aaggregate.R b/R/aaggregate.R
index d1e8839..58f6628 100644
--- a/R/aaggregate.R
+++ b/R/aaggregate.R
@@ -1,167 +1,167 @@
 test <- function(x,...) UseMethod("test")
 test.default <- function(x, ...) stop("not implemented for this class")
 test.challenge=function(x,...) aggregate.challenge(x=x,
                                                    FUN="significance",...)
 
 
 #' Title
 #'
 #' @param x
 #' @param FUN
 #' @param na.treat
 #' @param alpha
 #' @param p.adjust.method
 #' @param parallel
 #' @param progress
 #' @param ...
 #'
 #' @return
 #' @export
 #'
 #' @examples
 aggregate.challenge=function(x,
                              FUN=mean,
                              na.treat, #either "na.rm", numeric value or function
                              alpha=0.05, p.adjust.method="none",# only needed for significance
                              parallel=FALSE,
                              progress="none",...){
   call=as.list(match.call())
   if (missing(na.treat) && !is.null(attr(x,"na.treat"))) na.treat <- attr(x, "na.treat")
-  
+
   if (missing(na.treat)){ #na.treat only optional if no missing values in data set
     if (!inherits(x,"list")){
       if (!any(is.na(x[,attr(x, "value")]))) na.treat="na.rm" # there are no missings so set na.treat by dummy "na.rm" has no effect
     } else {
       if (!any(sapply(x,
                       function(task) any(is.na(task[,attr(x, "value")]))))) na.treat="na.rm" # there are no missings so set na.treat by dummy "na.rm" has no effect
     }
   } else attr(x,"na.treat") <- na.treat
 
   res1=do.call("Aggregate",list(object=x,
                                 x=attr(x,"value"),
                                 algorithm=attr(x,"algorithm"),
                                 FUN=call$FUN,
                                 na.treat=na.treat,
                                 parallel=parallel,
                                 progress=progress,
                                 case=attr(x,"case"),
                                 alpha=alpha, p.adjust.method=p.adjust.method,
-                                largeBetter=attr(x,"largeBetter") # only needed for significance
+                                smallBetter=attr(x,"smallBetter") # only needed for significance
   ))
 
   call2=call("Aggregate",
              object=call$x,
              x=attr(x,"value"),
              algorithm=attr(x,"algorithm"),
              FUN=call$FUN,
              na.treat=na.treat,
              parallel=parallel,progress=progress,
              case=attr(x,"case"),
              alpha=alpha, p.adjust.method=p.adjust.method,
-             largeBetter=attr(x,"largeBetter") # only needed for significance
+             smallBetter=attr(x,"smallBetter") # only needed for significance
   )
 
   if (inherits(x,"list")){
     res=list(FUN = . %>% (call2),
              call=list(call2),
              FUN.list=list(FUN),
              data=x,
              matlist=res1$matlist,
              isSignificance=res1$isSignificance)
 
     class(res)=c("aggregated.list",class(res))
   } else {
     res=list(FUN = . %>% (call2),
              call=list(call2),
              FUN.list=list(FUN),
              data=x,
              mat=res1$mat,
              isSignificance=res1$isSignificance)
 
     class(res)=c("aggregated",class(res))
 
   }
   res
 
 }
 
 
 aggregate.ranked.list <-function(x,
                                  FUN=mean,
                                  ...){
   call=match.call(expand.dots = F)
   call=call("aggregate.ranked.list",
             x=call$x,
             FUN=FUN)
 
   algorithm=attr(x$data,"algorithm")
   resmatlist=Aggregate.list(x$matlist,
                             x="rank",
                             algorithm=algorithm,
                             FUN=FUN,...)$matlist
   resmatlist=lapply(resmatlist,
                     function(z) as.data.frame(z))
   res=list(matlist=resmatlist,
            call=c(x$call,call),
            data=x$data,
            FUN =  . %>% (x$FUN) %>%  (call),
            FUN.list=c(x$FUN.list,FUN)
   )
   class(res)=c("aggregatedRanks.list",class(res))
   res
 
 }
 
 
 aggregate.bootstrap.list <-function(x,
                                     what="metric",
                                     FUN=mean,
                                     ...){
   call=match.call(expand.dots = T)
   if (is.character(FUN)) FUN=try(eval(parse(text=FUN)),
                                  silent = T)
   FUNname=as.character(call$FUN)
 
   if (!is.function(FUN)) stop("FUN has to be a function (possibly as character)")
   matlist=llply(1:length(x$bootsrappedRank),
                 function(i.piece){
                   if (what=="ranks") xmean <- as.data.frame(apply(x$bootsrappedRank[[i.piece]],1,FUN=FUN))
                   else xmean <- as.data.frame(apply(x$bootsrappedAggregate[[i.piece]],1,FUN=FUN))
                   names(xmean)=paste0(what,"_",FUNname)
                  xmean
                 })
 
 
   names(matlist)=names(x$bootsrappedRank)
   res=list(FUN = . %>% (call),
            call=list(call),
            data=x,
            matlist=matlist)
 
   class(res)=c("aggregated.list",class(res))
   res
 }
 
 aggregate.bootstrap<-function(x,what="metric",FUN=mean,
                               ...            ){
   call=match.call(expand.dots = T)
   if (is.character(FUN)) FUN=try(eval(parse(text=FUN)),silent = T)
   FUNname=as.character(call$FUN)
 
   if (!is.function(FUN)) stop("FUN has to be a function (possibly as character)")
 
   if (what=="ranks") xmean <- as.data.frame(apply(x$bootsrappedRank,
                                                   1,
                                                   FUN=FUN))
   else xmean <- as.data.frame(apply(x$bootsrappedAggregate,
                                     1,
                                     FUN=FUN))
   names(xmean)=paste0(what,"_",FUNname)
   res=list(FUN = . %>% (call),
            call=list(call),
            data=x,
            mat=xmean)
 
   class(res)=c("aggregated",class(res))
   res
 }
diff --git a/R/boxplot.R b/R/boxplot.R
index 6fa8eab..e39f20c 100644
--- a/R/boxplot.R
+++ b/R/boxplot.R
@@ -1,80 +1,68 @@
 
 boxplot.ranked.list=function(x,
                              color="blue",
                              jitter.width=0.25,...){
   algo=attr(x$data,"algorithm")
   value=attr(x$data,"value")
   ranking=x
   x=x$data
 
   for (i in names(x)) {
     x[[i]][[algo]]=factor(x[[i]][[algo]],
                           levels=rownames(ranking$matlist[[i]][order(ranking$matlist[[i]]$rank),]))
   }
 
   a=lapply(1:length(x),function(id){
     ggplot(aes_string(algo,value),data=x[[id]])+
       geom_jitter(position=position_jitter(width=jitter.width, height=0),
                   color=color,...)+
       geom_boxplot(outlier.shape = NA,fill=NA)+
       ggtitle(names(x)[id]) +
       theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
       xlab("Algorithm") +
       ylab("Metric value")
   })
 
   # Remove title for single-task data set
   if (length(a) == 1) {
     a[[1]]$labels$title <- NULL
   } else {
     names(a) = names(x$matlist)
   }
   class(a) <- "ggList"
   a
 }
 
 
 
 
 
 
 boxplot.comparedRanks.list=function(x,...){
   tau=sapply(x,function(z) z$tau)
   boxplot(tau,ylim=c(0,1.0),las=2, outline=FALSE,
           ylab="Kendall's tau",...)
   stripchart(tau,
              vertical = TRUE, method = "jitter",
              pch = 21, col = "blue", add=TRUE,...)
 
 }
 
 
 boxplot.bootstrap.list=function(x,...){
   winner.noboot=winner.ranked.list(x)
   x2=winnerFrequencies(x)
   n.bootstraps= ncol(x$bootsrappedRanks[[1]])
   perc_boot_Winner=lapply(1:length(x2),function(i){
     x2.i=x2[[i]]
     winner.id=which(rownames(x2.i)%in%rownames(winner.noboot[[i]])) #could be multiple winners!!!!
     100*x2.i[winner.id,3,drop=F]/n.bootstraps
   })
 
   boxplot(unlist(perc_boot_Winner),ylim=c(0,100),las=2, outline=FALSE,
           ylab="% Bootstraps",xlab="Winner ranks 1",
           sub=paste(n.bootstraps,"Bootstraps"),...)
   stripchart(unlist(perc_boot_Winner),
              vertical = TRUE, method = "jitter",
              pch = 21, col = "blue", add=TRUE,...)
 }
-
-
-
-
-# winnerFrequencies(bb)
- # winner.datax=winner(datax, largeBetter=TRUE) #no bootstrap
- #  x2=numberRank1(datax,originalranking.datax,datax_boot)
- #  boot_W_1=x2[which(x2$algorithm_id==winner.datax),3]
- #  boot_NW_1=sum(x2[-which(x2$algorithm_id==winner.datax),3]>9)
-# perc_boot_Winner=boot_W_1/Data$N_Bootstraps
-# perc_boot_NotWinner=Data$Bootstrap_Rank1_NotWinner/(Data$N_Algo-1)
-
diff --git a/R/challengeR.R b/R/challengeR.R
index fdd3a0a..c30253e 100644
--- a/R/challengeR.R
+++ b/R/challengeR.R
@@ -1,150 +1,150 @@
 #' Title
 #'
 #' @param object
 #' @param value
 #' @param algorithm
 #' @param case
 #' @param taskName Optional for single-task data set that does not contain a task column.
 #' @param by The name of the column that contains the task identifiers. Required for multi-task data set.
 #' @param annotator
 #' @param smallBetter
 #' @param na.treat
 #' @param check
 #'
 #' @return
 #' @export
 #'
 #' @examples
 as.challenge=function(object,
                       value,
                       algorithm ,
                       case=NULL,
                       taskName=NULL,
                       by=NULL,
                       annotator=NULL,
                       smallBetter=FALSE,
                       na.treat=NULL, # optional
                       check=TRUE) {
 
   object=as.data.frame(object[,c(value, algorithm, case, by, annotator)])
 
   # sanity checks
   if (check) {
 
     if (!is.null(by) && !is.null(taskName)) {
       warning("Argument 'taskName' is ignored for multi-task data set.")
     }
 
     # Add task column for data set without task column by using the specified task name.
     if (is.null(by) && !is.null(taskName)) {
       taskName <- trimws(taskName)
 
       if (taskName == "") {
         stop("Argument 'taskName' is empty.")
       }
 
       object <- cbind(task=taskName, object)
       by = "task"
     }
 
     # Add task column for data set without task column by using a dummy task name.
     if (is.null(by) && is.null(taskName)) {
       object <- cbind(task="dummyTask", object)
       by = "task"
     }
 
     object=splitby(object,by=by)
     object=lapply(object,droplevels)
     missingData = n.missing = list()
     for (task in names(object)) {
       if (!all(is.numeric(object[[task]][[value]]))) stop("Performance values must be numeric.")
 
       n.missing[[task]] <- sum(is.na(object[[task]][[value]])) # already missing before na.treat; for report
       if (n.missing[[task]]>0) message("Note: ", n.missing, " missing cases have been found in the data set.")
       # check for missing cases
         missingData[[task]]=object[[task]] %>%
           expand(!!as.symbol(algorithm),
                  !!as.symbol(case))%>%
           anti_join(object[[task]],
                     by=c( algorithm,case))
         if (nrow(missingData[[task]])>0) {
              if (length(object) == 1 ) { # single task
             message("Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:")
           } else { # multi task
             message("Performance of not all algorithms has been observed for all cases in task '",
                     task,
                     "'. Therefore, missings have been inserted in the following cases:")
 
           }
           print(as.data.frame(missingData[[task]]))
           object[[task]]=as.data.frame(object[[task]] %>%
                                          complete(task,
                                                   !!as.symbol(algorithm),
                                                   !!as.symbol(case)))
         }
       # check duplicate cases
          all1=apply(table(object[[task]][[algorithm]],
                            object[[task]][[case]]),
                      2,
                      function(x) all(x==1))
           if (!all(all1)) {
             n.duplicated <- sum(all1!=1)
 
             if (length(object) == 1 ) { # single task
               if (n.duplicated/length(all1) >= 1/5) { # at least a quarter of the cases is duplicated
                 stop ("The following case(s) appear(s) more than once for the same algorithm. Please revise. ",
                       "Or are you considering a multi-task challenge and forgot to specify argument 'by'?\n",
                       "Case(s): ",
                       paste(names(which(all1!=1)), collapse=", ")
                       )
               } else {
                 stop ("The following case(s) appear(s) more than once for the same algorithm. Please revise.\n",
                       "Case(s): ",
                       paste(names(which(all1!=1)), collapse=", ")
                       )
               }
             } else { # multi task
               stop ("The following case(s) appear(s) more than once for the same algorithm in task '",
                     task, "'. Please revise.\n",
                      "Case(s): ",
                     paste(names(which(all1!=1)), collapse=", ")
                     )
 
             }
           }
 
       if (!is.null(na.treat)) {
         if (is.numeric(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat
         else if (is.function(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat(object[[task]][,value][is.na(object[[task]][,value])])
         else if (is.character(na.treat) && na.treat=="na.rm") object[[task]]=object[[task]][!is.na(object[[task]][,value]),]
       }
     }
   }
   if (check==TRUE && (any(sapply(missingData, function(x) nrow(x))>0) |any(n.missing>0)))  {
     if (is.null(na.treat)) message("For aggregate-then-rank, na.treat will have to be specified. ",
                                    "For rank-then-aggregate, missings will implicitly lead to the algorithm ranked last for the missing test case."
                                )
     else if (is.numeric(na.treat)) message("All missings have been replaced by the value ", na.treat,".\n")
     else if (is.character(na.treat) && na.treat=="na.rm") message("All missings have been removed.")
     else if (is.function(na.treat)) {
       message("Missings have been replaced using function ")
       print(na.treat)
     }
   }
 
   if (check==TRUE){
     attr(object,"n.missing")=n.missing
     attr(object,"missingData")=missingData
   }
   attr(object,"na.treat")=na.treat
 
   attr(object,"algorithm")=algorithm
   attr(object,"value")=value
   attr(object,"case")=case
   attr(object,"annotator")=annotator
   attr(object,"by")=by
-  attr(object,"largeBetter")=!smallBetter
+  attr(object,"smallBetter")=smallBetter
   attr(object,"check")=check
   class(object)=c("challenge", class(object))
   object
 }
diff --git a/R/methodsplot.R b/R/methodsplot.R
index 5ab2757..c52cb6f 100644
--- a/R/methodsplot.R
+++ b/R/methodsplot.R
@@ -1,108 +1,108 @@
 methodsplot <- function(x,...) UseMethod("methodsplot")
 methodsplot.default <- function(x, ...) stop("not implemented for this class")
 
 methodsplot.challenge=function(x,
                                na.treat=NULL,
                                methods=list(testBased=.%>%test() %>% rank(ties.method = "min"),
                                             meanThenRank=  .%>%  aggregate(  FUN="mean") %>% rank(ties.method = "min"),
                                             medianThenRank=.%>% aggregate(  FUN="median") %>% rank(ties.method = "min"),
                                             rankThenMean= .%>%rank(ties.method = "min") %>%  aggregate(  FUN="mean") %>%rank(ties.method = "min"),
                                             rankThenMedian=.%>%rank(ties.method = "min") %>%  aggregate(  FUN="median") %>%rank(ties.method = "min")
                                             ),
                                ordering, ...) {
 
   if (any(sapply(x,
                   function(task) any(is.na(task[,attr(x, "value")]))))) { # only if missings present, else do nothing
     if (is.null(na.treat)) {
       warning("Please specify na.treat in as.challenge()")
       return(NULL)
     } else {
       xx = melt(x,
                 id.vars=c(attr(x,"value"),
                           attr(x,"algorithm") ,
                           attr(x,"case"),
                           attr(x,"annotator"),
                           attr(x,"by")
       ))
 
       x=as.challenge(xx,
                      value=attr(x,"value"),
                      algorithm=attr(x,"algorithm") ,
                      case=attr(x,"case"),
                      by=attr(x,"by"),
                      annotator = attr(x,"annotator"),
-                     smallBetter = !attr(x,"largeBetter"),
+                     smallBetter = attr(x,"smallBetter"),
                      na.treat=na.treat)
     }
   }
 
   a=lapply(methods,function(fun) fun(x))
   dat=melt(a,measure.vars="rank")
   colnames(dat)[4:5]=c("task","rankingMethod")
 
   if (missing(ordering)){
     lev=sort(unique(dat$algorithm))
     lab=lev
   } else {
     lev=ordering
     lab=lev
   }
 
   dat=dat%>%
     dplyr::rename(rank=.data$value)%>%
     mutate(rank=factor(.data$rank))%>%
     mutate(task=factor(.data$task))%>%
     mutate(algorithm=factor(.data$algorithm, levels=lev,labels = lab))
 
   linePlot <- ggplot(data = dat) +
     aes(x = rankingMethod, y = rank, color=algorithm, group=algorithm ) +
     geom_line(size=1)+
     xlab("Ranking method")  +
     ylab("Rank")+
     theme(
       strip.placement = "outside",
       axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
     )
 
   # Create multi-panel plot with task names as titles for multi-task data set
   if (length(x) > 1) {
     linePlot <- linePlot + facet_wrap(~ task)
    }
 
   return(linePlot)
 }
 
 # methodsplot.ranked.list does not exist, use methodpsplot.challenge instead since consonsus ranking needed for ordering (or alphabetical ordering instead)
 
 #similar plot to methods plot, instead of across ranking methods across tasks
 lineplot <- function(x,...) UseMethod("lineplot")
 lineplot.default <- function(x, ...) stop("not implemented for this class")
 
 lineplot.challenge=function(x,
                             ordering,...){
   if (inherits(x,"list"))  {
     dat=melt(x,measure.vars="rank")
     colnames(dat)[4]=c("task")
 
     if (missing(ordering)){
       lev=sort(unique(dat$algorithm))
       lab=lev
     } else {
       lev=ordering
       lab=paste(1:length(ordering),ordering)
     }
 
     dat=dat%>%
       dplyr::rename(rank=.data$value)%>%
       mutate(rank=factor(.data$rank))%>%
       mutate(task=factor(.data$task))%>%
       mutate(algorithm=factor(.data$algorithm, levels=lev,labels = lab))
 
     ggplot(data = dat) +
       aes(x = task, y = rank, color=algorithm, group=algorithm ) +
       geom_line(size=1)+
       theme(
         axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1) )
 
   } else stop("Only applicable to multiple tasks")
 }
diff --git a/R/podium.R b/R/podium.R
index 66109ba..e5d769d 100644
--- a/R/podium.R
+++ b/R/podium.R
@@ -1,163 +1,163 @@
 podium <- function(object,...) UseMethod("podium")
 podium.default <- function(object, ...) stop("not implemented for this class")
 
 podium.ranked.list=function(object,
                             xlab = NULL,
                             ylab = NULL,
                             lines.show = TRUE,
                             lines.alpha = 0.2,
                             lines.lwd = 1,
                             col,
                             lines.col = col,
                             dots.pch = 19,
                             dots.cex = 1,
                             places.lty = 2,
                             places.col = 1,
                             legendfn = function(algs, cols) {
                               legend("topleft", algs, lwd = 1, col = cols, bg = "white")
                             },
                             layout.heights=c(1,0.4),
                             ...){
     if (is.null(xlab)) xlab <- "Podium"
     if (is.null(ylab)) ylab <- "Performance"
     x=object$data
 
     podiumPlots <- length(names(x))
 
     for (subt in names(x)) {
       ordering=t(object$matlist[[subt]][,"rank",drop=F])["rank",]
       if (missing(col)) col=default_colors(length(ordering),
                                            algorithms = names(ordering))
 
       dd=as.challenge(x[[subt]],
                       value=attr(x,"value"),
                       algorithm=attr(x,"algorithm"),
                       case=attr(x,"case"),
                       by=attr(x, "by"),
                       annotator = attr(x,"annotator"),
-                      smallBetter = !attr(x,"largeBetter"),
+                      smallBetter = attr(x,"smallBetter"),
                       na.treat=object$call[[1]][[1]]$na.treat)
 
       podiumPlot <- podium(dd,
              ordering=ordering,
              xlab = xlab, ylab = ylab,
              lines.show = lines.show,
              lines.alpha = lines.alpha,
              lines.lwd = lines.lwd,
              col=col,
              lines.col = lines.col,
              dots.pch = dots.pch,
              dots.cex = dots.cex,
              places.lty = places.lty,
              places.col = places.col,
              legendfn = legendfn,
              layout.heights=layout.heights,
              ...)
 
       if (length(names(x)) > 1) {
         title(subt,outer=T,line=-3)
       }
 
       append(podiumPlots, podiumPlot)
     }
 }
 
 podium.challenge=function(object,
                           ordering,
                           xlab = NULL, ylab = NULL,
                           lines.show = FALSE, lines.alpha = 0.2,
                           lines.lwd = 1,
                           col,lines.col = col,
                           dots.pch = 19, dots.cex = 1,
                           places.lty = 2, places.col = 1,
                           legendfn = function(algs, cols) {
                             legend("topleft", algs, lwd = 1, col = cols, bg = "white")
                           },
                           layout.heights=c(1,0.4),
                           ...) {
 
   ranking=object%>%rank( ties.method = "random" )
 
   task <- ranking$matlist[[1]]
 
   dat=as.data.frame(table(task[[attr(object, "algorithm")]],
                           task$rank,
                           dnn=c("algorithm","rank")),
                     responseName = "Count")
 
   form=as.formula(paste(attr(object,"case"), attr(object,"algorithm"), sep="~"))
   ranks=acast(task, form, value.var="rank")
   values=acast(task, form, value.var=attr(object, "value"))
   nranks=acast(dat, algorithm~rank, value.var="Count")
 
   nalgs <- ncol(ranks)
   algs <- colnames(ranks)
 
   barorder <- order(ordering)
   orderedAlgorithms= names(ordering)[barorder]
 
   ylim=range(task[[attr(object,"value")]], na.rm = TRUE)
 
   dotplotborders <- (0:nalgs) * nalgs
   dotplaces <- (1:nalgs) - 0.5
   names(dotplaces) <- orderedAlgorithms
 
   linecols <- sapply(lines.col, function(c) {
     r <- col2rgb(c)
     rgb(r[1], r[2], r[3],
         alpha = round(255 * lines.alpha),
         maxColorValue = 255)
   })
 
   opar <- par(no.readonly = TRUE)
   layout(matrix(c(1, 2), nrow = 2, byrow = TRUE),
          heights =layout.heights)
 
   mar <- par("mar")
   par(mar = c(0, mar[2], mar[3], mar[4]))
 
   plot(dotplotborders, rep(ylim[2], nalgs + 1),
        type = "n",
        ylim = ylim, ylab = ylab, xlab = "",
        axes = F)
   axis(1, at = dotplotborders,
        labels = NA, lwd = par("lwd"))
   axis(2, lwd = par("lwd"))
   box()
   abline(v = dotplotborders, lty = places.lty, col = places.col)
   linesegments <- function(x, y, ...) {
     n <- length(x)
     segments(x[-n], y[-n], x[-1], y[-1], ...)
   }
   drawthe <- function(fn, col, ...) {
     for (i in 1:nrow(values)) {
       r <- ranks[i, ]
       o <- order(r)
       performances <- (values[i, ])[o]
       places <- (dotplaces[names(r)] + ((r - 1) * nalgs))[o]
       fn(places, performances, col = col[names(r)[o]], ...)
     }
   }
   if (lines.show) drawthe(linesegments, linecols, lwd = lines.lwd)
 
   drawthe(points, col, pch = dots.pch, cex = dots.cex)
 
   legendfn(orderedAlgorithms,
            col[orderedAlgorithms])
 
   par(mar = c(mar[1], mar[2], 0, mar[4]))
   barplot(nranks[barorder,],
           beside = TRUE,
           width = 1,
           axes = F,
           space = c(0, 0),
           border = NA,
           ylim = c(0, nrow(ranks)),
           names.arg = paste(1:nalgs, ".", sep = ""),
           col = col[orderedAlgorithms],
           xlab = xlab)
   axis(1, at = c(0, dotplotborders),
        labels = NA, lwd = par("lwd"))
   box()
   par(opar)
 }
diff --git a/R/rankNA2.R b/R/rankNA2.R
index be2120b..7c6828a 100644
--- a/R/rankNA2.R
+++ b/R/rankNA2.R
@@ -1,10 +1,10 @@
 rankNA2 <-
-function(x,ties.method="min",largeBetter=F){
-    r=rank((-1)^(largeBetter)*x,ties.method=ties.method,na.last="keep")  #xtfrm maybe faster alternative
+function(x,ties.method="min",smallBetter=TRUE){
+    r=rank((-1)^(!smallBetter)*x,ties.method=ties.method,na.last="keep")  #xtfrm maybe faster alternative
     if (any(is.na(x))){
         maxrank=ifelse(all(is.na(x)), yes=0, no=max(r,na.rm=TRUE))
-        if (ties.method%in%c("min","random")) r[is.na(x)]<-maxrank+1 
+        if (ties.method%in%c("min","random")) r[is.na(x)]<-maxrank+1
         if (ties.method=="average") r[is.na(x)]<-maxrank+mean(1:sum(is.na(x)))
     }
     r
 }
diff --git a/R/rankingHeatmap.R b/R/rankingHeatmap.R
index 588a550..82bfbcf 100644
--- a/R/rankingHeatmap.R
+++ b/R/rankingHeatmap.R
@@ -1,63 +1,63 @@
 rankingHeatmap <- function(x,...) UseMethod("rankingHeatmap")
 rankingHeatmap.default <- function(x, ...) stop("not implemented for this class")
 
 rankingHeatmap.ranked.list=function (x,ties.method="min",...) {
 
   xx=x$data
 
   a=lapply(names(x$matlist),function(subt){
     ordering=rownames(x$matlist[[subt]])[order(x$matlist[[subt]]$rank)]
 
     dd=as.challenge(xx[[subt]],
                     value=attr(xx,"value"),
                     algorithm=attr(xx,"algorithm") ,
                     case=attr(xx,"case"),
                     by=attr(xx, "by"),
                     annotator = attr(xx,"annotator"),
-                    smallBetter = !attr(xx,"largeBetter"),
+                    smallBetter = attr(xx,"smallBetter"),
                     na.treat=x$call[[1]][[1]]$na.treat)
 
     rankingHeatmap(dd,
                    ordering=ordering,
                    ties.method=ties.method,...) + ggtitle(subt)
   })
 
   # Remove title for single-task data set
   if (length(a) == 1) {
     a[[1]]$labels$title <- NULL
   } else {
     names(a) = names(x$matlist)
   }
 
   class(a) <- "ggList"
   a
 }
 
 
 rankingHeatmap.challenge=function(x,
                                   ordering,
                                   ties.method="min",...) {
   ranking=x%>%rank( ties.method = ties.method )
 
   task <- ranking$matlist[[1]]
 
   dat=as.data.frame(table(task[[attr(x,"algorithm")]],
                           task$rank,
                           dnn=c("algorithm","rank")),
                     responseName = "Count")
   dat$algorithm=factor(dat$algorithm, levels=ordering)
   ncases=length(unique(task[[attr(x,"case")]]))
   ggplot(dat)+
     geom_raster(aes(algorithm, rank, fill= Count))+
     geom_hline(yintercept = seq(1.5,max(task$rank)-.5,by=1),
                color=grey(.8),size=.3)+
     geom_vline(xintercept = seq(1.5,length(unique(dat$algorithm))-.5,by=1),
                color=grey(.8),size=.3)+
     scale_fill_viridis_c(direction = -1,
                          limits=c(0,ncases)
     )+
     theme(axis.text.x = element_text(angle = 90),
           aspect.ratio=1)+
     xlab("Algorithm")+
     ylab("Rank")
 }
diff --git a/R/rrank.R b/R/rrank.R
index 020d373..ce5ff6c 100644
--- a/R/rrank.R
+++ b/R/rrank.R
@@ -1,48 +1,46 @@
 rank <- function(object,...) UseMethod("rank")
 rank.default <- function(object, ...) base::rank(object,...)  #stats::aggregate
 
 rank.challenge=function(object,
                         x,
                         ties.method="min",...){
   call=as.list(match.call())
   if (!is.null(attr(object,"annotator"))) {
     call2=call("Rank",
                object=call$object,
                x=attr(object,"value"),
                annotator=c(attr(object,"annotator")),
                ties.method=ties.method,
-               largeBetter=attr(object,"largeBetter")
+               smallBetter=attr(object,"smallBetter")
              )
     res1=do.call("Rank",list(object=object,
                              x=attr(object,"value"),
                              annotator=c(attr(object,"annotator")),
                              ties.method=ties.method,
-                             largeBetter=attr(object,"largeBetter")
+                             smallBetter=attr(object,"smallBetter")
            ))
 
   } else {
     call2=call("Rank",
                object=call$object,
                x=attr(object,"value"),
                ties.method=ties.method,
-               largeBetter=attr(object,"largeBetter")
+               smallBetter=attr(object,"smallBetter")
              )
     res1=do.call("Rank",list(object=object,
                              x=attr(object,"value"),
                              ties.method=ties.method,
-                             largeBetter=attr(object,"largeBetter")
+                             smallBetter=attr(object,"smallBetter")
            ))
 
   }
 
   res=list(FUN = . %>% (call2),
            call=list(call2),
            FUN.list=list("rank"),
            data=object,
            matlist=res1$matlist)
 
   class(res)=c("ranked.list",class(res))
   res
 }
-
-
diff --git a/R/testBased.R b/R/testBased.R
index 897ec79..818e5a7 100644
--- a/R/testBased.R
+++ b/R/testBased.R
@@ -1,175 +1,175 @@
 decision <- function(x,...) UseMethod("decision")
 decision.default <- function(x, ...) stop("not implemented for this class")
 
 decision.challenge=function(x,
                             na.treat=NULL, #  it can be 'na.rm', numeric value or function
                             alpha=0.05,
                             p.adjust.method="none",
                             alternative="one.sided",
                             test.fun=function(x,y) wilcox.test(x,y,
                                                                alternative = alternative,exact=FALSE,
                                                                paired = TRUE)$p.value,
                             parallel=FALSE,
                             progress="none",...){
 
   if (is.null(na.treat)){ #na.treat only optional if no missing values in data set
     if (!inherits(x,"list")){
       if (!any(is.na(x[,attr(x, "value")]))) na.treat="na.rm" # there are no missings so set na.treat by dummy "na.rm" has no effect
       else stop("Please specify na.treat in as.challenge()")
     } else {
       if (!any(sapply(x,
                       function(task) any(is.na(task[,attr(x, "value")]))))) na.treat="na.rm" # there are no missings so set na.treat by dummy "na.rm" has no effect
       else stop("Please specify na.treat in as.challenge()")
     }
   }
 
 
-  if (alternative!="two.sided") alternative=ifelse(attr(x,"largeBetter"),
+  if (alternative!="two.sided") alternative=ifelse(!attr(x,"smallBetter"),
                                                    yes="greater",
                                                    no="less")
   call=match.call(expand.dots = T)
 
   object=x
   algorithm=attr(object,"algorithm")
   case=attr(object,"case")
   value=attr(object,"value")
-  largeBetter=attr(object,"largeBetter")
-  if(missing(case)| missing(largeBetter)) stop("arguments case and alpha need to be given in as.challenge()")
+  smallBetter=attr(object,"smallBetter")
+  if(missing(case)| missing(smallBetter)) stop("arguments case and alpha need to be given in as.challenge()")
 
 
   if (inherits(object,"list")){
     matlist=llply(1:length(object),
                   function(id){
                     piece=object[[id]]
                     if (length(unique(piece[[algorithm]]))<=1){
                       warning("only one ", algorithm, " available in element ", names(object)[id])
                     }
                     if (is.numeric(na.treat)) piece[,value][is.na(piece[,value])]=na.treat
                     else if (is.function(na.treat)) piece[,value][is.na(piece[,value])]=na.treat(piece[,value][is.na(piece[,value])])
                     else if (na.treat=="na.rm") piece=piece[!is.na(piece[,value]),]
-                    mat=Decision(piece, value, algorithm, case, alpha, largeBetter,
+                    mat=Decision(piece, value, algorithm, case, alpha, smallBetter,
                                  p.adjust.method=p.adjust.method,
                                  alternative=alternative,
                                  test.fun=test.fun)
                     mat=as.data.frame(mat)
                     mat[is.na(mat)]=0
                     mat=as.matrix(mat)
                     class(mat)=c(class(mat),"challenge.incidence")
                     mat
 
                   },
                   .parallel=parallel,
                   .progress=progress )
     names(matlist)=names(object)
     return(matlist)
   } else {
     if (length(unique(object[[algorithm]]))<=1){
       warning("only one ", algorithm, " available")
       matlist=(matrix(NA,1,1))
     } else {
       mat=Decision(object,
                    value,
                    algorithm,
                    case,
                    alpha,
-                   largeBetter,
+                   smallBetter,
                    p.adjust.method=p.adjust.method,
                    alternative=alternative,
                    test.fun=test.fun)
     }
     mat=as.data.frame(mat)
     mat[is.na(mat)]=0
     mat=as.matrix(mat)
     class(mat)=c(class(mat),"challenge.incidence")
     return(mat)
 
   }
 }
 
 
 Decision=function(object,
                   value,
                   by,
                   case,
                   alpha,
-                  largeBetter=FALSE,
+                  smallBetter=TRUE,
                   p.adjust.method="none",
                   alternative="one.sided",
                   test.fun=function(x,y) wilcox.test(x,y,
                                                      alternative = alternative,exact=FALSE,
                                                      paired = TRUE)$p.value){
   algorithms=unique(object[[by]])
   if (length(unique(object[[case]]))==1){
     warning("Only one case in task.")
   }
 
   combinations=expand.grid(algorithms,algorithms)[,2:1]
   combinations=combinations[apply(combinations,1,function(z) anyDuplicated(z)==0),] # remove i==j
 
   pvalues=sapply(1:nrow(combinations), function(it){
     dat1=object[object[[by]]==combinations[it,1],]
     dat2=object[object[[by]]==combinations[it,2],]
     id=intersect(dat2[,case],dat1[,case])
     dat1=dat1[match(id,dat1[,case]),value]
     dat2=dat2[match(id,dat2[,case]),value]
     test.fun(dat1,dat2)
 
   })
   decisions=as.numeric(p.adjust(pvalues,
                                 method=p.adjust.method)<= alpha)
   res=cbind(combinations,decisions)
   reshape2::acast(res,
                   Var2~Var1,
                   value.var="decisions")
 }
 
 
 as.relation.challenge.incidence=function(x,
                                          verbose = FALSE, ...) {
   r <- relation(incidence = x, ...)
 
   props <- check_strict_preference(r)
   class <- "strictpref"
   r$.Meta$is_decreasing <- FALSE
 
   r$.Meta <- c(r$.Meta,
                structure(props, names = sprintf("is_%s", names(props))))
 
   if ( verbose ) {
     for ( p in names(props) ) {
       cat(sprintf("%s = %s:\n", p, props[[p]]))
       print(relation_violations(r, p, TRUE))
     }
   }
 
   structure(r, class = c(class, class(r)))
 }
 
 check_strict_preference=
 function(x) {
   list(irreflexive = relation_is_irreflexive(x),
        asymmetric = relation_is_asymmetric(x),
        transitive = relation_is_transitive(x),
        negatively_transitive = relation_is_negatively_transitive(x),
        trichotomous = relation_is_trichotomous(x))
 }
 
 
 significance=function(object,
                       value,
                       algorithm,
                       case,
                       alpha,
-                      largeBetter=FALSE,...) {
+                      smallBetter=TRUE,...) {
 
   xx=as.challenge(object,
                   value=value,
                   algorithm=algorithm,
                   case=case,
-                  smallBetter = !largeBetter,
+                  smallBetter = smallBetter,
                   check=FALSE)
   a=decision.challenge(xx,...)
   prop_significance=rowSums(a)/(ncol(a)-1)
   return(data.frame("prop_significance"=prop_significance,
                     row.names = names(prop_significance)))
 }
diff --git a/R/wrapper.R b/R/wrapper.R
index 14371b1..3a70371 100644
--- a/R/wrapper.R
+++ b/R/wrapper.R
@@ -1,23 +1,21 @@
 aggregateThenRank=function(object,FUN,ties.method = "min",...){
-  object %>% 
-    aggregate(FUN=FUN,...) %>% 
+  object %>%
+    aggregate(FUN=FUN,...) %>%
     rank(ties.method = ties.method)
 }
 
 testThenRank=function(object,FUN,ties.method = "min",...){
-  object %>% 
-    aggregate(FUN="significance",...) %>% 
+  object %>%
+    aggregate(FUN="significance",...) %>%
     rank(ties.method = ties.method)
 }
 
 rankThenAggregate=function(object,
                            FUN,
                            ties.method = "min"
                            ){
-  object %>% 
-        rank(ties.method = ties.method)%>% 
-          aggregate(FUN=FUN) %>% 
-          rank(ties.method = ties.method) #small rank is always best, i.e. largeBetter always FALSE
+  object %>%
+        rank(ties.method = ties.method)%>%
+          aggregate(FUN=FUN) %>%
+          rank(ties.method = ties.method) # small rank is always best, i.e. smallBetter always TRUE
 }
-
-
diff --git a/inst/appdir/reportMultiple.Rmd b/inst/appdir/reportMultiple.Rmd
index 54bfa9e..ca545c7 100644
--- a/inst/appdir/reportMultiple.Rmd
+++ b/inst/appdir/reportMultiple.Rmd
@@ -1,409 +1,409 @@
 ---
 params:
   object: NA
   colors: NA
   name: NULL
   consensus: NA
   isMultiTask: NA
   bootstrappingEnabled: NA
   fig.format: NULL
   dpi: NULL
 title: "Benchmarking report for `r params$name` "
 author: "created by challengeR v`r packageVersion('challengeR')`"
 date: "`r Sys.setlocale('LC_TIME', 'English'); format(Sys.time(), '%d %B, %Y')`"
 editor_options: 
   chunk_output_type: console
 ---
 
 <!-- This text is outcommented -->
 <!-- R code chunks start with "```{r }" and end with "```" -->
 <!-- Please do not change anything inside of code chunks, otherwise any latex code is allowed -->
 
 <!-- inline code with `r 0` -->
 
 
 ```{r setup, include=FALSE}
 options(width=80)
 #out.format <- knitr::opts_knit$get("out.format")
 out.format <- knitr::opts_knit$get("rmarkdown.pandoc.to")
 
 img_template <- switch( out.format,
                      docx = list("img-params"=list(dpi=150,
                                                fig.width=6,
                                                fig.height=6,
                                                out.width="504px",
                                                out.height="504px")),
                      {
                        # default
                        list("img-params"=list( fig.width=7,
                                                fig.height = 3,
                                                dpi=300))
                      } )
 
 knitr::opts_template$set( img_template )
 
 knitr::opts_chunk$set(echo = F) # ,#fig.width=7,fig.height = 3,dpi=300,
 
 if (out.format != "docx") knitr::opts_chunk$set(fig.align = "center")
 if (!is.null(params$fig.format)) knitr::opts_chunk$set(dev = params$fig.format) # can be vector, e.g. fig.format=c('jpeg','png', 'pdf')
 if (!is.null(params$dpi)) knitr::opts_chunk$set(dpi = params$dpi) 
 
 theme_set(theme_light())
 
 isMultiTask = params$isMultiTask
 bootstrappingEnabled = params$bootstrappingEnabled
 
 ```
 
 
 ```{r }
 object = params$object
 if (isMultiTask) {
 ordering_consensus=names(params$consensus)
 } else
 {
   ordering_consensus=names(sort(t(object$matlist[[1]][,"rank",drop=F])["rank",]))
 }
 color.fun=params$colors
 
 ```
 
 ```{r }
 
 challenge_multiple=object$data
 ranking.fun=object$FUN
 
 cols_numbered=cols=color.fun(length(ordering_consensus))
 names(cols)=ordering_consensus
 names(cols_numbered)= paste(1:length(cols),names(cols))
 
 if (bootstrappingEnabled) {
   boot_object = params$object
   challenge_multiple=boot_object$data
 
   ranking.fun=boot_object$FUN
   object=challenge_multiple%>%ranking.fun
   object$FUN.list = boot_object$FUN.list
 
   object$fulldata=boot_object$fulldata  # only not NULL if subset of algorithms used
 
   cols_numbered=cols=color.fun(length(ordering_consensus))
   names(cols)=ordering_consensus
   names(cols_numbered)= paste(1:length(cols),names(cols))
 }
 
 
 ```
 
 This document presents a systematic report on the benchmark study "`r params$name`". Input data comprises raw metric values for all algorithms and cases. Generated plots are:
 
 ```{r, child=if (!isMultiTask && !bootstrappingEnabled) system.file("appdir", "overviewSingleTaskNoBootstrapping.Rmd", package="challengeR")}
 
 ```
 
 ```{r, child=if (!isMultiTask && bootstrappingEnabled) system.file("appdir", "overviewSingleTaskBootstrapping.Rmd", package="challengeR")}
 
 ```
 
 ```{r, child=if (isMultiTask && !bootstrappingEnabled) system.file("appdir", "overviewMultiTaskNoBootstrapping.Rmd", package="challengeR")}
 
 ```
 
 ```{r, child=if (isMultiTask && bootstrappingEnabled) system.file("appdir", "overviewMultiTaskBootstrapping.Rmd", package="challengeR")}
 
 ```
 
 Details can be found in Wiesenfarth et al. (2019).
 
 ```{r,results='asis'}
 
 if (isMultiTask) {
   cat("# Rankings\n")
 } else {
   cat("# Ranking")
 }
 
 ```
 
 
 Algorithms within a task are ranked according to the following ranking scheme:
 
 ```{r,results='asis'}
 a=(  lapply(object$FUN.list[1:2],function(x) {
                  if (!is.character(x)) return(paste0("aggregate using function ",
                                                      paste(gsub("UseMethod","",
                                                                 deparse(functionBody(x))),
                                                            collapse=" ")
                                                      ))
                  else if (x=="rank") return(x)
                  else return(paste0("aggregate using function ",x))
   }))
 cat("&nbsp; &nbsp; *",paste0(a,collapse=" then "),"*",sep="")
 
 if (is.character(object$FUN.list[[1]]) && object$FUN.list[[1]]=="significance") cat("\n\n Column 'prop_significance' is equal to the number of pairwise significant test results for a given algorithm divided by the number of algorithms.")
 ```
 
 ```{r,results='asis'}
 
 if (isMultiTask) {
   cat("Ranking for each task:\n")
   
   for (t in 1:length(object$matlist)){
     cat("\n",names(object$matlist)[t],": ")
     n.cases=nrow(challenge_multiple[[t]])/length(unique(challenge_multiple[[t]][[attr(challenge_multiple,"algorithm")]]))
 
     numberOfAlgorithms <- length(levels(challenge_multiple[[t]][[attr(challenge_multiple, "algorithm")]]))
     
     cat("\nThe analysis is based on",
         numberOfAlgorithms,
         "algorithms and",
         n.cases,
         "cases.",
         attr(object$data,"n.missing")[[t]], "missing cases have been found in the data set. ")
     
     if (nrow(attr(object$data,"missingData")[[t]])>0)  {
       if(attr(object$data,"n.missing")[[t]]==0 ) cat("However, ")
       else if(attr(object$data,"n.missing")[[t]]>0 ) cat("Additionally, ")
       cat("performance of not all algorithms has been observed for all cases in task '",
           names(object$matlist)[t],
           "'. Therefore, missings have been inserted in the following cases:")
       print(knitr::kable(as.data.frame(attr(object$data,"missingData")[[t]])))
     }
     
     if (nrow(attr(object$data,"missingData")[[t]])>0 | attr(object$data,"n.missing")[[t]]>0)  {
       if (is.numeric(attr(object$data,"na.treat"))) cat("All missings have been replaced by values of", attr(object$data,"na.treat"),".\n")
       else if (is.character(attr(object$data,"na.treat")) && attr(object$data,"na.treat")=="na.rm") cat("All missings have been removed.")
       else if (is.function(attr(object$data,"na.treat"))) {
         cat("Missings have been replaced using function ")
         print(attr(object$data,"na.treat"))
       }
       else if (is.character(object$FUN.list[[1]]) && object$FUN.list[[1]]=="rank") cat("Missings lead to the algorithm ranked last for the missing case.")
     }
     
     x=object$matlist[[t]]
     print(knitr::kable(x[order(x$rank),]))
   }
 } else {
 
   n.cases=nrow(challenge_multiple[[1]])/length(unique(challenge_multiple[[1]][[attr(challenge_multiple,"algorithm")]]))
   
   # Is subset of algorithms used?
   if (!is.null(object$fulldata[[1]])) {
     cat("The top ",
         length(levels(challenge_multiple[[1]][[attr(challenge_multiple, "algorithm")]])),
         " out of ",
         length(levels(object$fulldata[[1]][[attr(challenge_multiple, "algorithm")]])),
         " algorithms are considered.\n")
     cat("\nThe analysis is based on",
         n.cases,
         "cases. ")
   } else {
     cat("\nThe analysis is based on",
         length(levels(challenge_multiple[[1]][[attr(challenge_multiple, "algorithm")]])),
         "algorithms and",
         n.cases,
         "cases. ")
   }
 
   cat(attr(object$data,"n.missing")[[1]], "missing cases have been found in the data set. ")
 
   if (nrow(attr(object$data,"missingData")[[1]])>0)  {
     
     if(attr(object$data,"n.missing")[[1]]==0 ) cat("However, ")
     else if(attr(object$data,"n.missing")[[1]]>0 ) cat("Additionally, ")
     cat("performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:")
     print(knitr::kable(as.data.frame(attr(object$data,"missingData")[[1]])))
    }
   
   if (nrow(attr(object$data,"missingData")[[1]])>0 | attr(object$data,"n.missing")[[1]]>0)  {
     if (is.numeric(attr(object$data,"na.treat"))) cat("All missings have been replaced by values of", attr(object$data,"na.treat"),".\n")
     else if (is.character(attr(object$data,"na.treat")) && attr(object$data,"na.treat")=="na.rm") cat("All missings have been removed.")
     else if (is.function(attr(object$data,"na.treat"))) {
       cat("Missings have been replaced using function ")
       print(attr(object$data,"na.treat"))
     }
       else if (is.character(object$FUN.list[[1]]) && object$FUN.list[[1]]=="rank") cat("Missings lead to the algorithm ranked last for the missing case.")
   }
   cat("\n\nRanking:")
   
   x=object$matlist[[1]]
   print(knitr::kable(x[order(x$rank),]))
 }
 
 ```
 
 \bigskip
 
 
 ```{r, child=if (isMultiTask) system.file("appdir", "consensusRanking.Rmd", package="challengeR")}
 
 ```
 
 
 # Visualization of raw assessment data
 
 ```{r,results='asis'}
 
 if (isMultiTask) {
   cat("The algorithms are ordered according to the computed ranks for each task.")
 }
 
 ```
 
 ## Dot- and boxplot
 
 *Dot- and boxplots* for visualizing raw assessment data separately for each algorithm. Boxplots representing descriptive statistics over all cases (median, quartiles and outliers) are combined with horizontally jittered dots representing individual cases.
 
 \bigskip
 
 ```{r boxplots}
 boxplot(object, size=.8)
 ```
 
 
 
 ## Podium plot
 *Podium plots* (see also Eugster et al., 2008) for visualizing raw assessment data. Upper part (spaghetti plot): Participating algorithms are color-coded, and each colored dot in the plot represents a metric value achieved with the respective algorithm. The actual metric value is encoded by the y-axis. Each podium (here: $p$=`r length(ordering_consensus)`) represents one possible rank, ordered from best (1) to last (here: `r length(ordering_consensus)`). The assignment of metric values (i.e. colored dots) to one of the podiums is based on the rank that the respective algorithm achieved on the corresponding case. Note that the plot part above each podium place is further subdivided into $p$ "columns", where each column represents one participating algorithm (here: $p=$ `r length(ordering_consensus)`).  Dots corresponding to identical cases are connected by a line, leading to the shown spaghetti structure. Lower part: Bar charts represent the relative frequency for each algorithm to achieve the rank encoded by the podium place. 
 
 ```{r, include=FALSE, fig.keep="none",dev=NULL}
 plot.new()
 algs=ordering_consensus
 l=legend("topright", 
          paste0(1:length(algs),": ",algs), 
          lwd = 1, cex=1.4,seg.len=1.1,
          title="Rank: Alg.",
          plot=F) 
 
 w <- grconvertX(l$rect$w, to='ndc') - grconvertX(0, to='ndc')
 h<- grconvertY(l$rect$h, to='ndc') - grconvertY(0, to='ndc')
 addy=max(grconvertY(l$rect$h,"user","inches"),6)
 ```
 
 
 ```{r podium,eval=T,fig.width=12, fig.height=addy}
 #c(bottom, left, top, right
 
 op<-par(pin=c(par()$pin[1],6),
         omd=c(0, 1-w, 0, 1),
         mar=c(par('mar')[1:3], 0)+c(-.5,0.5,-.5,0),
         cex.axis=1.5,
         cex.lab=1.5,
         cex.main=1.7)
 
 oh=grconvertY(l$rect$h,"user","lines")-grconvertY(6,"inches","lines")
 if (oh>0) par(oma=c(oh,0,0,0))
 
 
 set.seed(38)
 podium(object,
        col=cols,
        lines.show = T, lines.alpha = .4,
        dots.cex=.9,
        ylab="Metric value",
        layout.heights=c(1,.35),
        legendfn = function(algs, cols) {
                  legend(par('usr')[2], par('usr')[4], 
                  xpd=NA, 
                  paste0(1:length(algs),": ",algs), 
                  lwd = 1, col =  cols, 
                  bg = NA,
                  cex=1.4, seg.len=1.1,
                  title="Rank: Alg.") 
         }
       )
 par(op)
   
 ```
 
 
 ## Ranking heatmap
 *Ranking heatmaps* for visualizing raw assessment data. Each cell $\left( i, A_j \right)$ shows the absolute frequency of cases in which algorithm $A_j$ achieved rank $i$.
 
 \bigskip
 
 ```{r rankingHeatmap,fig.width=9, fig.height=9,out.width='70%'}
 rankingHeatmap(object)
 ```
 
 
 
 # Visualization of ranking stability
 
 ```{r, child=if (bootstrappingEnabled) system.file("appdir", "visualizationBlobPlots.Rmd", package="challengeR")}
 
 ```
 
 ```{r, child=if (bootstrappingEnabled) system.file("appdir", "visualizationViolinPlots.Rmd", package="challengeR")}
 
 ```
 
 
 ## *Significance maps* for visualizing ranking stability based on statistical significance
 
 *Significance maps* depict incidence matrices of
 pairwise significant test results for the one-sided Wilcoxon signed rank test at a 5\% significance level with adjustment for multiple testing according to Holm. Yellow shading indicates that metric values of the algorithm on the x-axis were significantly superior to those from the algorithm on the y-axis, blue color indicates no significant difference.
 
 
 \bigskip
 
 ```{r significancemap,fig.width=6, fig.height=6,out.width='200%'}
 significanceMap(object,alpha=0.05,p.adjust.method="holm")
 ```
 
 <!-- \subsubsection{Hasse diagram} -->
 
 <!-- ```{r single_stability_significance_hasse, fig.height=19} -->
 <!-- plot(relensemble) -->
 <!-- ``` -->
 
 
 
 
 ## Ranking robustness to ranking methods
 *Line plots* for visualizing rankings robustness across different ranking methods. Each algorithm is represented by one colored line. For each ranking method encoded on the x-axis, the height of the line represents the corresponding rank. Horizontal lines indicate identical ranks for all methods.
 
 \bigskip
 
 ```{r lineplot,fig.width=8, fig.height=6,out.width='95%'}
 if (length(object$matlist)<=6 &nrow((object$matlist[[1]]))<=10 ){
   methodsplot(challenge_multiple,
               ordering = ordering_consensus,
               na.treat=object$call[[1]][[1]]$na.treat) + 
     scale_color_manual(values=cols)
 }else {
   x=challenge_multiple
   for (subt in names(challenge_multiple)){
      dd=as.challenge(x[[subt]],
                      value=attr(x,"value"), 
                      algorithm=attr(x,"algorithm") ,
                      case=attr(x,"case"),
                      annotator = attr(x,"annotator"), 
                      by=attr(x,"by"),
-                     smallBetter = !attr(x,"largeBetter"),
+                     smallBetter = attr(x,"smallBetter"),
                      na.treat=object$call[[1]][[1]]$na.treat
                      )
  
     print(methodsplot(dd,
                       ordering = ordering_consensus) + 
             scale_color_manual(values=cols)
           )
   }
 }
 ```
 
 
 
 ```{r, child=if (isMultiTask) system.file("appdir", "visualizationAcrossTasks.Rmd", package="challengeR")}
 
 ```
 
 
 
 # References
 
 Wiesenfarth, M., Reinke, A., Landman, B.A., Cardoso, M.J., Maier-Hein, L. and Kopp-Schneider, A. (2019). Methods and open-source toolkit for analyzing and visualizing challenge results. *arXiv preprint arXiv:1910.05121*
 
 M. J. A. Eugster, T. Hothorn, and F. Leisch, “Exploratory
 and inferential analysis of benchmark experiments,”
 Institut fuer Statistik, Ludwig-Maximilians-Universitaet Muenchen, Germany, Technical Report 30,
 2008. [Online]. Available: http://epub.ub.uni-muenchen.de/4134/.
diff --git a/tests/testthat/test-challenge.R b/tests/testthat/test-challenge.R
index ea3a179..89b5643 100644
--- a/tests/testthat/test-challenge.R
+++ b/tests/testthat/test-challenge.R
@@ -1,578 +1,578 @@
 test_that("empty attribute 'taskName' raises error for single-task challenge", {
   data <- rbind(
       data.frame(algo="A1", value=0.8, case="C1"),
       data.frame(algo="A2", value=0.6, case="C1"))
 
   expect_error(as.challenge(data, taskName="", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "Argument 'taskName' is empty.", fixed=TRUE)
 })
 
 test_that("only whitespaces in attribute 'taskName' raises error for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   expect_error(as.challenge(data, taskName="  ", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "Argument 'taskName' is empty.", fixed=TRUE)
 })
 
 test_that("attributes are set for single-task challenge with specified task name", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   expect_equal(attr(actualChallenge, "annotator"), NULL)
   expect_equal(attr(actualChallenge, "by"), "task")
-  expect_equal(attr(actualChallenge, "largeBetter"), TRUE)
+  expect_equal(attr(actualChallenge, "smallBetter"), FALSE)
   expect_equal(attr(actualChallenge, "check"), TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C1"))
   expect_equal(as.vector(actualChallenge$T1$task), c("T1", "T1"))
 
   # expect that there's no attribute "task"
   expect_equal(attr(actualChallenge, "task"), NULL)
   expect_equal(attr(actualChallenge$T1, "task"), NULL)
   expect_equal(attr(actualChallenge$T2, "task"), NULL)
 })
 
 test_that("attributes are set for single-task challenge with dummy task name", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   actualChallenge <- as.challenge(data, algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   expect_equal(attr(actualChallenge, "annotator"), NULL)
   expect_equal(attr(actualChallenge, "by"), "task")
-  expect_equal(attr(actualChallenge, "largeBetter"), TRUE)
+  expect_equal(attr(actualChallenge, "smallBetter"), FALSE)
   expect_equal(attr(actualChallenge, "check"), TRUE)
 
   expect_equal(as.vector(actualChallenge$dummyTask$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$dummyTask$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$dummyTask$case), c("C1", "C1"))
   expect_equal(as.vector(actualChallenge$dummyTask$task), c("dummyTask", "dummyTask"))
 
   # expect that there's no attribute "task"
   expect_equal(attr(actualChallenge, "task"), NULL)
   expect_equal(attr(actualChallenge$dummyTask, "task"), NULL)
   expect_equal(attr(actualChallenge$dummyTask, "task"), NULL)
 })
 
 test_that("leading and trailing whitespaces are trimmed for attribute 'taskName'", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C1"))
 
   actualChallenge <- as.challenge(data, taskName=" T1  ", algorithm="algo", case="case", value="value", smallBetter=FALSE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
 })
 
 test_that("attributes are set for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C1")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A2", value=0.3, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=TRUE)
 
   expect_equal(attr(actualChallenge, "annotator"), NULL)
   expect_equal(attr(actualChallenge, "by"), "task")
-  expect_equal(attr(actualChallenge, "largeBetter"), FALSE)
+  expect_equal(attr(actualChallenge, "smallBetter"), TRUE)
   expect_equal(attr(actualChallenge, "check"), TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C1"))
   expect_equal(as.vector(actualChallenge$T1$task), c("T1", "T1"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.2, 0.3))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C1"))
   expect_equal(as.vector(actualChallenge$T2$task), c("T2", "T2"))
 
   # expect that there's no attribute "task"
   expect_equal(attr(actualChallenge, "task"), NULL)
   expect_equal(attr(actualChallenge$T1, "task"), NULL)
   expect_equal(attr(actualChallenge$T2, "task"), NULL)
 })
 
 test_that("attributes are set for multi-task challenge with sanity check disabled", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C1")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A2", value=0.3, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=TRUE, check=FALSE)
 
   expect_equal(attr(actualChallenge, "annotator"), NULL)
   expect_equal(attr(actualChallenge, "by"), "task")
-  expect_equal(attr(actualChallenge, "largeBetter"), FALSE)
+  expect_equal(attr(actualChallenge, "smallBetter"), TRUE)
   expect_equal(attr(actualChallenge, "check"), FALSE)
   expect_equal(as.vector(actualChallenge$algo), c("A1", "A2", "A1", "A2"))
   expect_equal(as.vector(actualChallenge$value), c(0.8, 0.6, 0.2, 0.3))
   expect_equal(as.vector(actualChallenge$case), c("C1", "C1", "C1", "C1"))
   expect_equal(as.vector(actualChallenge$task), c("T1", "T1", "T2", "T2"))
 })
 
 test_that("attribute 'taskName' is ignored for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C1")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A2", value=0.3, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_warning(as.challenge(data, taskName="T1", by="task", algorithm="algo", case="case", value="value", smallBetter=TRUE),
                  "Argument 'taskName' is ignored for multi-task data set.", fixed=TRUE)
 })
 
 test_that("missing algorithm performances are added as NAs for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_message(actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                  "Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, NA, NA, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
   expect_equal(as.vector(actualChallenge$T1$task), c("T1", "T1", "T1", "T1"))
 })
 
 test_that("multi-task data set containing one task is interpreted as single-task data set, missing algorithm performances are added", {
   data <- cbind(task="T1",
                 rbind(
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A2", value=0.6, case="C2")
                 ))
 
   # do not specify parameter "by" to interpret multi-task data set as single-task data set
   expect_message(actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                  "Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, NA, NA, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("missing algorithm performances are added as NAs for multi-task challenge (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A1", value=0.3, case="C2"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_message(actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                  "Performance of not all algorithms has been observed for all cases in task 'T1'. Therefore, missings have been inserted in the following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, NA, NA, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
   expect_equal(as.vector(actualChallenge$T1$task), c("T1", "T1", "T1", "T1"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.2, 0.3, 0.4, NA))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2", "C1", "C2"))
   expect_equal(as.vector(actualChallenge$T2$task), c("T2", "T2", "T2", "T2"))
 })
 
 test_that("missing algorithm performances are not added as NA with sanity check disabled for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, check=FALSE)
 
   expect_equal(as.vector(actualChallenge$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$case), c("C1", "C2"))
 })
 
 test_that("missing algorithm performances are not added as NA with sanity check disabled for multi-task challenge (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A1", value=0.3, case="C2"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, check=FALSE)
 
   expect_equal(as.vector(actualChallenge$algo), c("A1", "A2", "A1", "A1", "A2"))
   expect_equal(as.vector(actualChallenge$value), c(0.8, 0.6, 0.2, 0.3, 0.4))
   expect_equal(as.vector(actualChallenge$case), c("C1", "C2", "C1", "C2", "C1"))
 })
 
 test_that("case cannot appear more than once per algorithm for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=0.8, case="C1"))
 
   expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1", fixed=TRUE)
 })
 
 test_that("multi-task data set containing one task is interpreted as single-task data set, case cannot appear more than once per algorithm", {
   data <- cbind(task="T1",
                 rbind(
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A1", value=0.8, case="C1")
                 ))
 
   # do not specify parameter "by" to interpret multi-task data set as single-task data set
   expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1", fixed=TRUE)
 })
 
 test_that("case cannot appear more than once per algorithm for multi-task challenge (1 task in data set)", {
   data <- cbind(task="T1",
                 rbind(
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A1", value=0.8, case="C1")
                 ))
 
   expect_error(as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1", fixed=TRUE)
 })
 
 test_that("cases cannot appear more than once per algorithm for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.7, case="C1"),
     data.frame(algo="A1", value=0.5, case="C2"),
     data.frame(algo="A2", value=0.6, case="C2"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1, C2", fixed=TRUE)
 })
 
 test_that("cases cannot appear more than once per algorithm for multi-task challenge (1 task in data set)", {
   data <- cbind(task="T1",
                 rbind(
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A1", value=0.8, case="C1"),
                   data.frame(algo="A2", value=0.7, case="C1"),
                   data.frame(algo="A1", value=0.5, case="C2"),
                   data.frame(algo="A2", value=0.6, case="C2"),
                   data.frame(algo="A2", value=0.6, case="C2")
                 ))
 
   expect_error(as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1, C2", fixed=TRUE)
 })
 
 test_that("cases cannot appear more than once per algorithm for multi-task challenge (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1") # let T1 pass
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.7, case="C1"),
                        data.frame(algo="A1", value=0.5, case="C2"),
                        data.frame(algo="A2", value=0.6, case="C2"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_error(as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "The following case(s) appear(s) more than once for the same algorithm in task 'T2'. Please revise.\nCase(s): C1, C2", fixed=TRUE)
 })
 
 test_that("cases cannot appear more than once per algorithm when missing data was added for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_error(as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1, C2", fixed=TRUE)
 })
 
 test_that("user is notified of duplicate cases when multi-task data set is interpreted as single-task data set (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1")
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   # do not specify parameter "by" to interpret multi-task data set as single-task data set
   expect_error(as.challenge(data, taskName="New task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                "The following case(s) appear(s) more than once for the same algorithm. Please revise. Or are you considering a multi-task challenge and forgot to specify argument 'by'?\nCase(s): C1", fixed=TRUE)
 })
 
 test_that("user is notified of missing algorithm performance when multi-task data set is interpreted as single-task data set (2 tasks in data set)", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1")
                      ))
 
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   # do not specify parameter "by" to interpret multi-task data set as single-task data set
   expect_message(as.challenge(data, taskName="New task", algorithm="algo", case="case", value="value", smallBetter=FALSE),
                  "Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:", fixed=TRUE)
 })
 
 test_that("NAs are replaced by numeric value for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=NA, case="C2"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A2", value=NA, case="C2"))
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=0)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.0, 0.6, 0.0))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("NAs are replaced by numeric value for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A1", value=NA, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A2", value=NA, case="C1"),
                        data.frame(algo="A2", value=0.5, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=0)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.0))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A2", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.0, 0.5))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2"))
 })
 
 test_that("NAs are replaced by function value for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=NA, case="C2"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A2", value=NA, case="C2"))
 
   replacementFunction <- function(x) { 2 }
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=replacementFunction)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 2.0, 0.6, 2.0))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("NAs are replaced by function value for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A1", value=NA, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A2", value=NA, case="C1"),
                        data.frame(algo="A2", value=0.5, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   replacementFunction <- function(x) { 2 }
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=replacementFunction)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 2.0))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A2", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(2.0, 0.5))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2"))
 })
 
 test_that("NAs are removed for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A1", value=NA, case="C2"),
     data.frame(algo="A2", value=0.6, case="C1"),
     data.frame(algo="A2", value=NA, case="C2"))
 
   actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat="na.rm")
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C1"))
 })
 
 test_that("NAs are removed for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A1", value=NA, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A2", value=NA, case="C1"),
                        data.frame(algo="A2", value=0.5, case="C2")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat="na.rm")
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.5))
   expect_equal(as.vector(actualChallenge$T2$case), c("C2"))
 })
 
 test_that("automatically added NAs are replaced by numeric value for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_message(actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=0),
                  "Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.0, 0.0, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("automatically added NAs are replaced by numeric value for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A1", value=0.3, case="C2"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_message(actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat=0),
                  "Performance of not all algorithms has been observed for all cases in task 'T1'. Therefore, missings have been inserted in the following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.0, 0.0, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2", "C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A1", "A1", "A2", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.2, 0.3, 0.4, 0.0))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2", "C1", "C2"))
 })
 
 test_that("automatically added NAs are removed for single-task challenge", {
   data <- rbind(
     data.frame(algo="A1", value=0.8, case="C1"),
     data.frame(algo="A2", value=0.6, case="C2"))
 
   expect_message(actualChallenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat="na.rm"),
                  "Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:", fixed=TRUE)
 
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2"))
 })
 
 test_that("automatically added NAs are removed for multi-task challenge", {
   dataTask1 <- cbind(task="T1",
                      rbind(
                        data.frame(algo="A1", value=0.8, case="C1"),
                        data.frame(algo="A2", value=0.6, case="C2")
                      ))
   dataTask2 <- cbind(task="T2",
                      rbind(
                        data.frame(algo="A1", value=0.2, case="C1"),
                        data.frame(algo="A1", value=0.3, case="C2"),
                        data.frame(algo="A2", value=0.4, case="C1")
                      ))
 
   data <- rbind(dataTask1, dataTask2)
 
   expect_message(actualChallenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE, na.treat="na.rm"),
                  "Performance of not all algorithms has been observed for all cases in task 'T1'. Therefore, missings have been inserted in the following cases:", fixed=TRUE)
-  
+
   expect_equal(as.vector(actualChallenge$T1$algo), c("A1", "A2"))
   expect_equal(as.vector(actualChallenge$T1$value), c(0.8, 0.6))
   expect_equal(as.vector(actualChallenge$T1$case), c("C1", "C2"))
 
   expect_equal(as.vector(actualChallenge$T2$algo), c("A1", "A1", "A2"))
   expect_equal(as.vector(actualChallenge$T2$value), c(0.2, 0.3, 0.4))
   expect_equal(as.vector(actualChallenge$T2$case), c("C1", "C2", "C1"))
 })
 
diff --git a/vignettes/Overview.Rmd b/vignettes/Overview.Rmd
index fae9849..e374a91 100644
--- a/vignettes/Overview.Rmd
+++ b/vignettes/Overview.Rmd
@@ -1,370 +1,370 @@
 ---
 title: "How to use challengeR"
 output: rmarkdown::html_vignette
 vignette: >
   %\VignetteIndexEntry{How to use challengeR}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
 ---
 
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>"
 )
 ```
 
 # Introduction
 
 This document is meant to be an overview guide of the classes, methods and different steps used in the tutorial scripts, and aims to achieve a deeper understanding of the analysis and visualization toolkit. The overview is divided in sections, following the usage.
 
 # Ranking configuration
 
 Once the data has been loaded (either manually or using a .csv file), the first thing to do is to create a challenge object. Then, the ranking method will be chosen and configured.
 
 ## Define challenge object
 
 Challenges can be single- or multi-task. We define a challenge task as a subproblem to be solved in the scope of a challenge for which a dedicated ranking/leaderboard is provided (if any). The assessment method (e.g. metric(s) applied) may vary across different tasks of a challenge. For example, a segmentation challenge may comprise three tasks:
 
 1) segmentation of the liver
 2) segmentation of the kidney 
 3) segmentation of the spleen 
 
 In the context of the visualization toolkit, we differentiate between challenges that only comprise a single-task ("single-task challenge") and challenges with multiple tasks with each task containing different results and rankings ("multi-task challenge"). In the latter case, the report can directly be configured across all specified tasks by defining a task column in the data matrix.
 
 The first step is to create a challenge object. The file "challengeR.R" will be used for that purpose, which will be now analysed.
 
 The following code refers to the constructor:
 ```{r, eval=F, echo=T}
 # challengeR.R
 as.challenge=function(object, 
                       value, 
                       algorithm ,
                       case=NULL,
                       taskName=NULL,
                       by=NULL, 
                       annotator=NULL, 
                       smallBetter=FALSE,
                       na.treat=NULL,
                       check=TRUE)
 ```
 
 Each parameter corresponds to:
 
 - object: the object that will be returned, in the specific case, the data set itself
 - value: column corresponding to the values of the metric (only one metric is supported)
 - algorithm: column corresponding to the algorithm identifiers
 - case: column corresponding to the test case identifier
 - taskName: optional task name (string) for single-task challenges, the parameter will be displayed as titles of plots
 - by: (="task" ), use it when it is a multi-task challenge. If the parameter is not specified, the challenge will be automatically be interpreted as a single-task challenge.
 - annotator: (currently not implemented) specify here if there are more than one annotator
 - smallBetter: specify if small metric values are indicating a better performance
 - na.treat: (optional) specify how missing values (NA) are treated, e.g. set them to the worst possible metric values. There is no need to specify this value because either if the user knows for sure that the data set has no NAs, or if the data set has NAs and rank-then-aggregate is applied. 
 - check: computes sanity check if TRUE. The sanity check can be computed for both single- and multi-task challenges. It checks missing algorithm performance, and also whether the test cases appear more than once.
 
 An example of how to use it (for a multi-task challenge):
 ```{r, eval=F, echo=T}
 # challengeR.R
 challenge=as.challenge(data_matrix, 
                        value="value",
                        algorithm="alg_name", 
                        case="case",  
                        by="task",
                        smallBetter = FALSE)
 ```
 
 ! Take into account that for single-task challenges, the "by" parameter should not be configured !
 
 For single-task challenges, if the data matrix consists of a task column, it is easier to create a subset of the data matrix that only includes the values for that specific task:
 ```{r, eval=F, echo=T}
 dataSubset=subset(data_matrix, task=="TASK_NAME")
 ```
 
 In this way, "dataSubset" will be used to create the challenge object.
 
 ## Configure ranking method
 
 The classes "wrapper.R", "aaggregate.R" and "Rank.aggregated.R" are used.
 
 In order to configure the ranking methods, the next parameters are considered:
 
 - FUN: aggregation function, e.g. mean, median, min, max, or e.g. function(x) quantile(x, probs=0.05)
 - na.treat: treatment of missing data / null values (here needs to be specified again, because it was an optional parameter when the challenge object was created) either "na.rm" to remove missing data, set missings to numeric value (e.g. 0) or specify a function e.g. function(x) min(x)
 - ties.method: a character string specifying how ties (two items that are the same in rank) are treated, see ?base::rank or click on [*Strategies for assigning rankings*](https://en.wikipedia.org/wiki/Ranking#Strategies_for_assigning_rankings) for more details
 - alpha: significance level (only for Significance ranking)
 - p.adjust.method: method for adjustment for multiple testing, see ?p.adjust
 
 Different ranking methods are available:
 
 #### Metric-based aggregation -> aggregateThenRank method
 
 ```{r, eval=F, echo=T}
 # wrapper.R
 aggregateThenRank=function(object,FUN,ties.method = "min",...){
   object %>% 
     aggregate(FUN=FUN,...) %>% 
     rank(ties.method = ties.method)
 }
 ```
 
 First, (object %>% aggregate), the metric values for each algorithm are aggregated across all cases using the specified aggregation function:
 ```{r, eval=F, echo=T}
 # aaggregate.R
 aggregate.challenge=function(x,
                              FUN=mean,
                              na.treat,
                              alpha=0.05, 
                              p.adjust.method="none",
                              parallel=FALSE,
                              progress="none",
                              ...)
 ```
 
-Second, (aggregate %>% rank), the aggregated metric values are converted into a ranking list, following the largeBetter argument defined above:
+Second, (aggregate %>% rank), the aggregated metric values are converted into a ranking list, following the smallBetter argument defined above:
 ```{r, eval=F, echo=T}
 # Rank.aggregated.R
 rank.aggregated <-function(object,
                            ties.method="min",
-                           largeBetter,
+                           smallBetter,
                            ...)
 ```
 
 An example for "aggregate-then-rank" use (takink mean for aggregation):
   
 ```{r, eval=F, echo=T}
 ranking=challenge%>%aggregateThenRank(FUN = mean, 
                                       na.treat=0, 
                                       ties.method = "min"
                                       )  
 ```
 
 #### Case-based aggregation -> rankThenAggregate method
 
 ```{r, eval=F, echo=T}
 # wrapper.R
 rankThenAggregate=function(object,
                            FUN,
                            ties.method = "min"
                            ){
   object %>% 
         rank(ties.method = ties.method)%>% 
           aggregate(FUN=FUN) %>% 
           rank(ties.method = ties.method) 
 }
 ```
  
 First, (object %>% rank), a ranking will be created for each case across all algorithms. Missing values will be assigned to the worst rank:
 ```{r, eval=F, echo=T}
 # rrank.R
 rank.challenge=function(object,
                         x,
                         ties.method="min",
                         ...)
 ```
 
 Second, (rank %>% aggregate), the ranks per case will be aggregated for each algorithm:
 ```{r, eval=F, echo=T}
 # aaggregate.R
 aggregate.ranked <-function(x,
                             FUN=mean, ...                      )
 ```
 
 Third, (aggregate %>% rank), the previously ranked and aggregated values are converted to a ranking list again:
 ```{r, eval=F, echo=T}
 # Rank.aggregated.R
 rank.aggregated <-function(object,
                            ties.method="min",
-                           largeBetter,
+                           smallBetter,
                            ...)
 ```
 
 An example for  "rank-then-aggregate" with arguments as above (taking mean for aggregation):
 ```{r, eval=F, echo=T}
 ranking=challenge%>%rankThenAggregate(FUN = mean,
                                       ties.method = "min"
                                       )
 ```
 
 
 #### Significance ranking -> testThenRank method
 
 This method is similar to "aggregateThenRank", but having a fixed "significance" function.
 
 ```{r, eval=F, echo=T}
 # wrapper.R
 testThenRank=function(object,FUN,ties.method = "min",...){
   object %>% 
     aggregate(FUN="significance",...) %>% 
     rank(ties.method = ties.method)
 }
 ```
 
 First, (object %>% aggregate),the metric values will be aggregated across all cases. In this case, a pairwise comparison between all algorithms will be performed by using statistical tests. For each algorithm, it will be counted how often the specific algorithm is significantly superior to others. This count will be saved as aggregated value:
 
 ! No need to specify the function again, it is already set as "significance" !
 ```{r, eval=F, echo=T}
 # aaggregate.R
 aggregate.challenge=function(x,
                              FUN="significance", 
                              na.treat, 
                              alpha=0.05, 
                              p.adjust.method="none",
                              parallel=FALSE,
                              progress="none",
                              ...)
 ```
 
 Second, (aggregate %>% rank), the aggregated values are converted to a ranking list:
 ```{r, eval=F, echo=T}
 # Rank.aggregated.R
 rank.aggregated <-function(object,
                            ties.method="min",
-                           largeBetter,
+                           smallBetter,
                            ...)
 ```
 
 An example for test-then-rank based on Wilcoxon signed rank test:
 ```{r, eval=F, echo=T}
 ranking=challenge%>%testThenRank(alpha=0.05,
                                  p.adjust.method="none",
                                  na.treat=0,
                                  ties.method = "min"
                                  )
 
 ```
 
 # Uncertainty analysis (bootstrapping)
 
 The assessment of stability of rankings across different ranking methods with respect to both sampling variability and variability across tasks is of major importance. In order to investigate ranking stability, the bootstrap approach can be used for a given method. 
 
 The procedure consists on:
 
 1. Use available data sets to generate N bootstrap datasets
 2. Perform ranking on each bootstrap dataset
 
 The ranking strategy is performed repeatedly on each bootstrap sample. One bootstrap sample of a task with n test cases consists of n test cases randomly drawn with replacement from this task. A total of b of these bootstrap samples are drawn (e.g., b = 1000). Bootstrap approaches can be evaluated in two ways: either the rankings for each bootstrap sample are evaluated for each algorithm, or the distribution of correlations or pairwise distances between the ranking list based on the full assessment data and based on each bootstrap sample can be explored.
 
 ! Note that this step is optional, can be omitted and directly generate the report. !
 
 The following method is used to perform ranking on the generated bootstrap datasets:
 ```{r, eval=F, echo=T}
 # Bootstrap.R
 bootstrap.ranked=function(object,
                           nboot, 
                           parallel=FALSE,
                           progress="text",
                           ...)
 ```
 
 - nboot: number of bootstrap datasets to generate
 - parallel: TRUE when using multiple CPUs
 - progress: when setting it to "text", a progress bar indicating the progress of the bootstrapping is shown
 
 An example of bootstrapping using multiple CPUs (8 CPUs):
 
 ```{r, eval=F, echo=T}
 library(doParallel)
 registerDoParallel(cores=8)
 set.seed(1)
 ranking_bootstrapped=ranking%>%bootstrap(nboot=1000, parallel=TRUE, progress = "none")
 stopImplicitCluster()
 ```
 
 
 # Report generation
 
 Finally, the report will be generated. For this last step take into account if the uncertainty analysis was performed or not.
 
 If the uncertainty analysis was not performed, use:
 
 ```{r, eval=F, echo=T}
 # Report.R
 report.ranked=function(object,
                        file,
                        title="<Challenge name>",
                        colors=default_colors,
                        format="PDF",
                        latex_engine="pdflatex",
                        open=TRUE,
                        ...)
 ```
   
 If the uncertainty analysis was performed, use:
 
 ```{r, eval=F, echo=T}
 # Report.R
 report.bootstrap=function(object,
                           file,
                           title="<Challenge name>",
                           colors=default_colors,
                           format="PDF",
                           latex_engine="pdflatex",
                           clean=TRUE,
                           open=TRUE,
                           ...)
 ```
   
 The report can be generated in different formats:
 
 - file: name of the output file. If the output path is not specified, the working directory is used. If the file is specified but does not have a file extension, an extension will be automatically added according to the output format given in *format*. If omitted, the report is  created in a temporary folder with file name "report".
 - title: title of the report
 - colors: color coding for the algorithms across all figures. Can be specified. Change e.g. to colors=viridisLite::inferno which is designed in such a way that it will analytically be perfectly perceptually-uniform, both in regular form and also when converted to black-and-white. It is also designed to be perceived by readers with the most common form of color blindness. See package viridis for further similar functions.
 - format: output format ("PDF", "HTML" or "Word")
 - latex_engine: LaTeX engine for producing PDF output ("pdflatex", "lualatex", "xelatex")
 - clean: optional. Using TRUE will clean intermediate files that are created during rendering. Using FALSE allows to retain intermediate files, such as separate files for each figure.
 - open: triggers opening of the report after generation or not
   
 An example of how to generate the report for a *single-task* challenge:  
 
 ```{r, eval=F, echo=T}
 ranking_bootstrapped %>%
   report(title="singleTaskChallengeExample",
          file = "filename",
          format = "PDF", 
          latex_engine="pdflatex", 
          clean=TRUE
         )
 ```  
 
 ! Note that the code differs slightly for single- and multi-task challenges. !  
 
 For multi-task challenges consensus ranking (rank aggregation across tasks) has to be given additionally. Consensus relations “synthesize” the information in the elements of a relation ensemble into a single relation, often by minimizing a criterion function measuring how dissimilar consensus candidates are from the (elements of) the ensemble (the so-called “optimization approach”).
 
 The following method is used:
 
 ```{r, eval=F, echo=T}
 # consensus.R
 consensus.ranked.list=function(object,
                                method,
                                ...)
 ```
 
 - method: consensus ranking method, see ?relation_consensus for different methods to derive consensus ranking.
 
 An example of computing ranking consensus across tasks, being consensus ranking according to mean ranks across tasks:
 
 ```{r, eval=F, echo=T}
 meanRanks=ranking%>%consensus(method = "euclidean")
 ```
 
 Generate report as above, but with additional specification of consensus ranking:
 
 ```{r, eval=F, echo=T}
 ranking_bootstrapped %>%
   report(consensus=meanRanks,
          title="multiTaskChallengeExample",
          file = "filename",
          format = "PDF",
          latex_engine="pdflatex"
         )
 ```
 
 # Features
 
 - Reports for subsets (top list) of algorithms: Use e.g. `subset(ranking_bootstrapped, top=3) %>% report(...)` (or `subset(ranking, top=3) %>% report(...)` for report without bootstrap results) to only show the top 3 algorithms according to the chosen ranking methods, where  `ranking_bootstrapped` and `ranking` objects as defined in the example. Line plot for ranking robustness can be used to check whether algorithms performing well in other ranking methods are excluded. Bootstrapping still takes entire uncertainty into account. Podium plot neglect and ranking heatmap neglect excluded algorithms. Only available for single-task challenges (for mutli task challenges not sensible because each task would contain a different sets of algorithms). 
 - Reports for subsets of tasks: Use e.g. `subset(ranking_bootstrapped, tasks=c("task1", "task2","task3)) %>% report(...)` to restrict report to tasks "task1", "task2","task3. You may want to recompute the consensus ranking before using `meanRanks=subset(ranking, tasks=c("task1", "task2","task3))%>%consensus(method = "euclidean")`
 
 # Terms of use
 Licenced under GPL-3. If you use this software for a publication, cite
 
 Wiesenfarth, M., Reinke, A., Landman, B.A., Cardoso, M.J., Maier-Hein, L. and Kopp-Schneider, A. (2019). Methods and open-source toolkit for analyzing and visualizing challenge results. *arXiv preprint arXiv:1910.05121*