diff --git a/.gitignore b/.gitignore
index c833a2c..47281dc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 .Rproj.user
 .Rhistory
 .RData
 .Ruserdata
 inst/doc
+doc
+Meta
diff --git a/DESCRIPTION b/DESCRIPTION
index 564ff9f..f66a29b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,32 +1,33 @@
 Package: challengeR
 Type: Package
 Title: Analyzing assessment data of biomedical image analysis competitions and visualization of results
 Version: 0.3.3
 Date: 2020-04-18
 Author: Manuel Wiesenfarth, Annette Kopp-Schneider
 Maintainer: Manuel Wiesenfarth <m.wiesenfarth@dkfz.de>
 Description: Analyzing assessment data of biomedical image analysis competitions and visualization of results.
 License: GPL-3
 Depends:
   R (>= 3.5.2),
   ggplot2 (>= 3.3.0),
   purrr (>= 0.3.3)
 Imports:
   dplyr (>= 0.8.5),
   graph (>= 1.64.0),
   knitr (>= 1.28),
   methods (>= 3.6.0),
   plyr (>= 1.8.6),
   relations (>= 0.6-9),
   reshape2 (>= 1.4.3),
   rlang (>= 0.4.5),
   rmarkdown (>= 2.1),
   tidyr (>= 1.0.2),
   viridisLite (>= 0.3.0)
 Suggests:
   doParallel (>= 1.0.15),
   foreach (>= 1.4.8),
   ggpubr (>= 0.2.5),
   Rgraphviz (>= 2.30.0),
   testthat (>= 2.1.0)
 VignetteBuilder: knitr
+RoxygenNote: 7.1.0
diff --git a/R/Bootstrap.R b/R/Bootstrap.R
index f85f99c..42ce7ac 100644
--- a/R/Bootstrap.R
+++ b/R/Bootstrap.R
@@ -1,160 +1,192 @@
 bootstrap <- function(object,...) UseMethod("bootstrap")
 bootstrap.default <- function(object, ...) stop("not implemented for this class")
 
 
+#' Performs bootstrapping
+#'
+#' Performs bootstrapping on a ranked assessment data set and applies the ranking method to each bootstrap sample. One bootstrap sample of
+#' a task with \code{n} cases consists of \code{n} cases randomly drawn with replacement from this task.
+#' A total of \code{nboot} of these bootstrap samples are drawn.
+#'
+#' @param object The ranked assessment data set.
+#' @param nboot The number of bootstrap samples.
+#' @param parallel A boolean specifying whether parallel processing should be enabled.
+#' @param progress A string specifying the type of progress indication.
+#' @param ... Further arguments passed to or from other functions.
+#'
+#' @return An S3 object of class "bootstrap.list" to represent a bootstrapped, ranked assessment data set.
+#'
+#' @examples
+#'
+#' \dontrun{
+#'  # perform bootstrapping with 1000 bootstrap samples using one CPU
+#'  set.seed(1)
+#'  ranking_bootstrapped <- bootstrap(ranking, nboot = 1000)
+#' }
+#'
+#' \dontrun{
+#'  # perform bootstrapping using multiple CPUs (here: 8 CPUs)
+#'  library(doParallel)
+#'  registerDoParallel(cores=8)
+#'  set.seed(1)
+#'  ranking_bootstrapped <- bootstrap(ranking, nboot = 1000, parallel = TRUE, progress = "none")
+#'  stopImplicitCluster()
+#' }
+#'
+#' @export
 bootstrap.ranked.list=function(object,
                                nboot,
                                parallel=FALSE,
                                progress="text",
                                ...){
   algorithm=attr(object$data,"algorithm")
   by=attr(object$data,"case")
 
   # exclude if only 1 data set or less than 3 algorithms
   tidy.data.id=sapply(object$data,
                       function(data.subset) {
                         ifelse((length(unique(data.subset[[by]]))==1 |  length(unique(data.subset[[algorithm]]))<=2 ),
                                yes=FALSE,
                                no=TRUE)
                         })
   tidy.data=object$data[tidy.data.id]
   tidy.matlist=object$matlist[tidy.data.id]
 
   res= llply(1:nboot,
              function(it){
                # draw 1 sample for each task
                bootDatalist = lapply(tidy.data, function(data.subset) {
                  index = unique(data.subset[[by]])
 
                  # bootIndex=sample(index,size=length(index),replace=TRUE)
                  # bootData=bind_rows(lapply(bootIndex,function(zz) data.subset[data.subset[[by]]==zz,]))
                  # faster:
                  bootIndex = data.frame(sample(index,
                                                size = length(index),
                                                replace = TRUE))
                  colnames(bootIndex) = by
                  bootData = merge(bootIndex,
                                   data.subset,
                                   by = by)
                  bootData
                })
                attr(bootDatalist, "inverseOrder") = attr(object$data, "inverseOrder")
                attr(bootDatalist, "algorithm") = attr(object$data, "algorithm")
                attr(bootDatalist, "case") = attr(object$data, "case")
                attr(bootDatalist, "check") = FALSE
                object$FUN(bootDatalist)$mat
              },
              .parallel = parallel,
              .progress = progress)
 
   rankmatlist = lapply(res[[1]],
                        function(z) z[, "rank", drop = F]
                        )
   for (j in 2:length(res)) {
     rankmatlist = quickmerge.list(rankmatlist,
                                   lapply(res[[j]],
                                          function(z)  z[, "rank", drop = F]))
   }
 
   aggmatlist = lapply(res[[1]],
                       function(z) z[, -2, drop = F])
   for (j in 2:length(res)) {
     aggmatlist = quickmerge.list(aggmatlist,
                                  lapply(res[[j]],
                                         function(z) z[, -2, drop = F]))
   }
 
   final=list(bootsrappedRanks=rankmatlist,
              bootsrappedAggregate=aggmatlist,
              data=object$data,
              matlist=tidy.matlist,
              FUN=object$FUN,
              FUN.list=object$FUN.list)
   class(final)=c("bootstrap.list")
   final
 }
 
 
 
 
 
 
 ####################################################################################################
 # deprecate following functions?
 
 
 
 rankFrequencies <- function(object,...) UseMethod("rankFrequencies")
 rankFrequencies.default <- function(object, ...) stop("not implemented for this class")
 
 rankFrequencies.bootstrap=function(object, who,...){
   if (is.data.frame(who)) who=rownames(who)
   if (length(who)==1){
     res=table(t(object$bootsrappedRanks[rownames(object$bootsrappedRanks)==who,]))
     cat("\n",who,"\n")
     print(res)
   } else {
     res=lapply(who, function(w){
       rr=table(t(object$bootsrappedRanks[rownames(object$bootsrappedRanks)==w,]))
     cat(w,"\n")
       print(rr)
       cat("\n")
       rr
     })
   }
   res=c(list(rankFrequencies=res),object)
   invisible(res)
 }
 
 rankFrequencies.bootstrap.list=function(object, who,...){
   if (is.data.frame(who)) who=rownames(who)
   res=lapply(object$bootsrappedRanks,function(bootMat){
     if (length(who)==1){
       res=table(t(bootMat[rownames(bootMat)==who,]))
       cat("\n",who,"\n")
       print(res)
     } else {
       res=lapply(who, function(w){
         rr=table(t(bootMat[rownames(bootMat)==w,]))
         cat(w,"\n")
         print(rr)
         cat("\n")
         rr
       })
     }
     res
   })
   res=c(list(rankFrequencies=res),object)
   invisible(res)
 }
 
 
 
 
 winnerFrequencies <- function(object,...) UseMethod("winnerFrequencies")
 winnerFrequencies.default <- function(object, ...) stop("not implemented for this class")
 
 # Achtung: bester rank muss ==1 sein und nicht z.B. 1.5
 winnerFrequencies.bootstrap=function(object,...){
   rankings_dicho=ifelse(object$bootsrappedRanks==1,1,0)
   winnerFrequencies=data.frame(winnerFrequency=rowSums(rankings_dicho),row.names = rownames(object$bootsrappedRanks))
   res=merge(object$mat,winnerFrequencies,by="row.names",...)
   rownames(res)=res[,1]
   res=res[,-1]
   # res=c(res=res,object)
   # class(res)="bootstrapResults"
   res
 }
 
 winnerFrequencies.bootstrap.list=function(object,...){
   res=lapply(1:length(object$bootsrappedRanks),function(id){
     rankings_dicho=ifelse(object$bootsrappedRanks[[id]]==1,1,0)
     winnerFrequencies=data.frame(winnerFrequency=rowSums(rankings_dicho),row.names = rownames(object$bootsrappedRanks[[id]]))
     res=merge(object$matlist[[id]],winnerFrequencies,by="row.names",...)
     rownames(res)=res[,1]
     res=res[,-1]
     res
   })
   names(res)=names(object$bootsrappedRanks)
   res
 }
diff --git a/R/challengeR.R b/R/challengeR.R
index c30253e..69117af 100644
--- a/R/challengeR.R
+++ b/R/challengeR.R
@@ -1,150 +1,188 @@
-#' Title
+#' Constructs a challenge object
 #'
-#' @param object
-#' @param value
-#' @param algorithm
-#' @param case
-#' @param taskName Optional for single-task data set that does not contain a task column.
-#' @param by The name of the column that contains the task identifiers. Required for multi-task data set.
-#' @param annotator
-#' @param smallBetter
-#' @param na.treat
-#' @param check
+#' Constructs an S3 object to represent the configuration of an assessment data set originating from a benchmarking competition (so-called "challenge").
 #'
-#' @return
-#' @export
+#' @section Assessment data set:
+#' The toolkit provides visualization approaches for both challenges designed around a single task (single-task challenges) and for challenges comprising multiple tasks (multi-task challenges).
+#' For a single-task challenge, the assessment data set (argument \code{object}) requires the following columns:
+#' \itemize{
+#'   \item test case identifier (string or numeric)
+#'   \item algorithm identifier (string or numeric)
+#'   \item performance value (numeric)
+#' }
+#'
+#' For a multi-task challenge, the assessment data set (argument \code{object}) requires the following columns:
+#' \itemize{
+#'   \item task identifier (string or numeric)
+#'   \item test case identifier (string or numeric)
+#'   \item algorithm identifier (string or numeric)
+#'   \item performance value (numeric)
+#' }
+#'
+#' @section Sanity check:
+#' It is highly recommended that the sanity check is not disabled when the data set is provided initially.
+#' It checks that:
+#' \itemize{
+#'   \item performance values are numeric (if not, raises error)
+#'   \item algorithm performances are observed for all cases (if not, adds them as NA and emits a message)
+#'   \item cases appear only once for the same algorithm (if not, raises error)
+#' }
+#' If the argument \code{na.treat} for treatment of NA is specified, NAs will be handled respectively.
+#'
+#' It might be reasonable to disable the sanity check for further computations (e.g., for performance reasons
+#' during bootstrapping (\code{\link{bootstrap.ranked.list}}) where cases are actually allowed to appear more than once for the same algorithm).
+#'
+#' @param object A data frame containing the assessment data.
+#' @param case A string specifying the name of the column that contains the case identifiers.
+#' @param algorithm A string specifying the name of the column that contains the algorithm identifiers.
+#' @param value A string specifying the name of the column that contains the performance values.
+#' @param by A string specifying the name of the column that contains the task identifiers. Required for multi-task data set.
+#' @param taskName A string specifying the task name for single-task data set that does not contain a task column.
+#'   This argument is optional for a single-task data set and is ignored for a multi-task data set.
+#' @param annotator Not supported
+#' @param smallBetter A boolean specifying whether small performance values indicate better algorithm performance.
+#' @param na.treat Indicates how missing perfomance values are treated if sanity check is enabled. It can be 'na.rm', numeric value or function.
+#'   For a numeric value or function, NAs will be replaced by the specified values. For 'na.rm', rows that contain missing values will be removed.
+#' @param check A boolean to indicate to perform a sanity check of the specified data set and arguments if set to \code{TRUE}.
+#'
+#' @return An S3 object to represent the configuration of an assessment data set.
 #'
 #' @examples
+#' # single-task data set
+#'
+#' # multi-task data set
+#'
+#' @export
 as.challenge=function(object,
+                      case,
+                      algorithm,
                       value,
-                      algorithm ,
-                      case=NULL,
-                      taskName=NULL,
                       by=NULL,
+                      taskName=NULL,
                       annotator=NULL,
                       smallBetter=FALSE,
                       na.treat=NULL, # optional
                       check=TRUE) {
 
   object=as.data.frame(object[,c(value, algorithm, case, by, annotator)])
 
   # sanity checks
   if (check) {
 
     if (!is.null(by) && !is.null(taskName)) {
       warning("Argument 'taskName' is ignored for multi-task data set.")
     }
 
     # Add task column for data set without task column by using the specified task name.
     if (is.null(by) && !is.null(taskName)) {
       taskName <- trimws(taskName)
 
       if (taskName == "") {
         stop("Argument 'taskName' is empty.")
       }
 
       object <- cbind(task=taskName, object)
       by = "task"
     }
 
     # Add task column for data set without task column by using a dummy task name.
     if (is.null(by) && is.null(taskName)) {
       object <- cbind(task="dummyTask", object)
       by = "task"
     }
 
     object=splitby(object,by=by)
     object=lapply(object,droplevels)
     missingData = n.missing = list()
     for (task in names(object)) {
       if (!all(is.numeric(object[[task]][[value]]))) stop("Performance values must be numeric.")
 
       n.missing[[task]] <- sum(is.na(object[[task]][[value]])) # already missing before na.treat; for report
       if (n.missing[[task]]>0) message("Note: ", n.missing, " missing cases have been found in the data set.")
       # check for missing cases
         missingData[[task]]=object[[task]] %>%
           expand(!!as.symbol(algorithm),
                  !!as.symbol(case))%>%
           anti_join(object[[task]],
                     by=c( algorithm,case))
         if (nrow(missingData[[task]])>0) {
              if (length(object) == 1 ) { # single task
             message("Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:")
           } else { # multi task
             message("Performance of not all algorithms has been observed for all cases in task '",
                     task,
                     "'. Therefore, missings have been inserted in the following cases:")
 
           }
           print(as.data.frame(missingData[[task]]))
           object[[task]]=as.data.frame(object[[task]] %>%
                                          complete(task,
                                                   !!as.symbol(algorithm),
                                                   !!as.symbol(case)))
         }
       # check duplicate cases
          all1=apply(table(object[[task]][[algorithm]],
                            object[[task]][[case]]),
                      2,
                      function(x) all(x==1))
           if (!all(all1)) {
             n.duplicated <- sum(all1!=1)
 
             if (length(object) == 1 ) { # single task
               if (n.duplicated/length(all1) >= 1/5) { # at least a quarter of the cases is duplicated
                 stop ("The following case(s) appear(s) more than once for the same algorithm. Please revise. ",
                       "Or are you considering a multi-task challenge and forgot to specify argument 'by'?\n",
                       "Case(s): ",
                       paste(names(which(all1!=1)), collapse=", ")
                       )
               } else {
                 stop ("The following case(s) appear(s) more than once for the same algorithm. Please revise.\n",
                       "Case(s): ",
                       paste(names(which(all1!=1)), collapse=", ")
                       )
               }
             } else { # multi task
               stop ("The following case(s) appear(s) more than once for the same algorithm in task '",
                     task, "'. Please revise.\n",
                      "Case(s): ",
                     paste(names(which(all1!=1)), collapse=", ")
                     )
 
             }
           }
 
       if (!is.null(na.treat)) {
         if (is.numeric(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat
         else if (is.function(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat(object[[task]][,value][is.na(object[[task]][,value])])
         else if (is.character(na.treat) && na.treat=="na.rm") object[[task]]=object[[task]][!is.na(object[[task]][,value]),]
       }
     }
   }
   if (check==TRUE && (any(sapply(missingData, function(x) nrow(x))>0) |any(n.missing>0)))  {
     if (is.null(na.treat)) message("For aggregate-then-rank, na.treat will have to be specified. ",
                                    "For rank-then-aggregate, missings will implicitly lead to the algorithm ranked last for the missing test case."
                                )
     else if (is.numeric(na.treat)) message("All missings have been replaced by the value ", na.treat,".\n")
     else if (is.character(na.treat) && na.treat=="na.rm") message("All missings have been removed.")
     else if (is.function(na.treat)) {
       message("Missings have been replaced using function ")
       print(na.treat)
     }
   }
 
   if (check==TRUE){
     attr(object,"n.missing")=n.missing
     attr(object,"missingData")=missingData
   }
   attr(object,"na.treat")=na.treat
 
   attr(object,"algorithm")=algorithm
   attr(object,"value")=value
   attr(object,"case")=case
   attr(object,"annotator")=annotator
   attr(object,"by")=by
   attr(object,"smallBetter")=smallBetter
   attr(object,"check")=check
   class(object)=c("challenge", class(object))
   object
 }
diff --git a/R/report.R b/R/report.R
index 2b06c97..c9d8f05 100644
--- a/R/report.R
+++ b/R/report.R
@@ -1,174 +1,165 @@
 report <- function(object,...) UseMethod("report")
 report.default <- function(object, ...) stop("not implemented for this class")
 
+
+#' Generates a benchmarking report with bootstrapping results
+#'
+#' Generates a benchmarking report in PDF, HTML or Word format with bootstrapping results.
+#' It contains the rankings, plots of the raw assessment data and plots of the ranking stability.
+#' For multi-task challenges, it also contains plots of cross-task insights. If you are interested in
+#' the individual plots as separate files, set argument \code{clean} to \code{FALSE} and specify \code{fig.format}.
+#'
+#' @param object The ranked (bootstrapped) assessment data set.
+#' @param consensus The rank aggregation across tasks (consensus ranking). Only needed for a multi-task data set.
+#' @param file A string specifying the file name of the report. It allows for specifying the output file path as well,
+#'   otherwise the working directory is used. If \code{file} does not have a file extension, an extension will be automatically
+#'   added according to the output format given in \code{format}. If the argument is omitted, the report is created in a
+#'   temporary folder with file name "report".
+#' @param title A string specifying the title of the report.
+#' @param colors The color scheme that is applied to the plots.
+#' @param format A string specifying the format of the report. The options are "PDF", "HTML" or "Word".
+#' @param latex_engine A string specifying the LaTeX engine for producing PDF output. The Options are "pdflatex", "lualatex", and "xelatex".
+#' @param clean A boolean indicating whether intermediate files (e.g. individual plots) should be kept. Using \code{TRUE} will clean
+#'   intermediate files that are created during rendering.
+#' @param fig.format A vector of strings containing the file format of the figures that are not removed if \code{clean} is set to \code{FALSE}.
+#'   The options are "jpeg", "png" and "pdf",  e.g. \code{fig.format = c("jpeg", "png", "pdf")}.
+#' @param dpi A positive integer specifying the resolution of the generated plot (\code{fig.format} "jpeg" or "png") in dots per inch (DPI).
+#' @param open A boolean specifying whether the report should be opened with the default system viewer after generation.
+#' @param ... Further arguments passed to or from other functions.
+#'
+#' @return
+#'
+#' @examples
+#' @export
 report.bootstrap.list=function(object,
                                consensus,
                                file,
                                title="<Challenge name>",
                                colors=default_colors,
                                format="PDF",
                                latex_engine="pdflatex",
-                               fig.format = NULL, # file format of figures if clean=FALSE, can be vector, e.g. fig.format=c('jpeg','png', 'pdf')
+                               clean=TRUE,
+                               fig.format = NULL, # file format of figures if clean==FALSE, can be vector, e.g. fig.format=c('jpeg','png', 'pdf')
                                dpi = 150, # DPI, relevant for bitmaps if clean==FALSE and fig.format specified
                                open=TRUE,...){
 
   # Copy the report file to a temporary directory before processing it, in
   # case we don't have write permissions to the current working dir (which
   # can happen when deployed).
   if (missing(file)) tempReport <- file.path(tempdir(), "report.Rmd")
   else {
     a=strsplit(file,"/")[[1]]
     path=paste0(a[-length(a)],collapse="/")
     if (path=="") tempReport=file.path(paste0(strsplit(a[length(a)],
                                                        ".",
                                                        fixed=T)[[1]][1],".Rmd"))
     else tempReport=file.path(path,paste0(strsplit(a[length(a)],
                                                    ".",
                                                    fixed=T)[[1]][1],".Rmd"))
   }
   file.copy(file.path(system.file("appdir", package = "challengeR"),
                       "reportMultiple.Rmd"),
             tempReport,
             overwrite = TRUE)
 
   if (length(object$matlist) > 1) {
     consensus = consensus
     isMultiTask = TRUE
   }
   else {
     consensus = NULL
     isMultiTask = FALSE
   }
 
+  bootstrappingEnabled = TRUE
+
+  if (is(object, "ranked.list")) {
+    bootstrappingEnabled = FALSE
+  }
+
   # Set up parameters to pass to Rmd document
   if (!is.null(fig.format) & format=="PDF") fig.format=c("pdf",fig.format)
   if (!is.null(fig.format) && fig.format[1]=="pdf" && format=="Word") fig.format <- c(fig.format[-1], fig.format[1]) # in word avoid use of pdf to be embedded in document
   params <- list(
     object=object,
     consensus=consensus,
     name=title,
     colors=colors,
     isMultiTask=isMultiTask,
-    bootstrappingEnabled=TRUE,
+    bootstrappingEnabled=bootstrappingEnabled,
     fig.format = fig.format,
     dpi = dpi
   )
 
   # Knit the document, passing in the `params` list, and eval it in a
   # child of the global environment (this isolates the code in the document
   # from the code in this app).
-  # render(tempReport, output_file = file,
-  #   params = params,
-  #   envir = new.env(parent = globalenv())
-  # )
   out <- render(tempReport,
                 switch(
                   format,
                   PDF = pdf_document(number_sections=T,
                                      latex_engine=latex_engine),
                   HTML = html_document(number_sections=T),
                   Word = word_document(df_print="kable")
                   ),
                 params = params,
                 envir = new.env(parent = globalenv()),
+                clean = clean,
                 ...
   )
 
   if (!missing(file)){
     if (is.na(strsplit(file,".",fixed=T)[[1]][2])) file=paste0(file,
                                                                ".",
                                                                strsplit(out,".",fixed=T)[[1]][2])
     file.rename(out, file)
   } else file=out
 
   file.remove(tempReport)
 
   if (open) system(paste0('open "', file, '"'))
 }
 
+#' Generates a benchmarking report without bootstrapping results
+#'
+#' Generates a benchmarking report in PDF, HTML or Word format without bootstrapping results.
+#' It contains the rankings, plots of the raw assessment data and plots of the ranking stability.
+#' For multi-task challenges, it also contains plots of cross-task insights. If you are interested in
+#' the individual plots as separate files, set argument \code{clean} to \code{FALSE} and specify \code{fig.format}.
+#'
+#' @param object The ranked assessment data set.
+#' @param consensus The rank aggregation across tasks (consensus ranking). Only needed for a multi-task data set.
+#' @param file A string specifying the file name of the report. It allows for specifying the output file path as well,
+#'   otherwise the working directory is used. If \code{file} does not have a file extension, an extension will be automatically
+#'   added according to the output format given in \code{format}. If the argument is omitted, the report is created in a
+#'   temporary folder with file name "report".
+#' @param title A string specifying the title of the report.
+#' @param colors The color scheme that is applied to the plots.
+#' @param format A string specifying the format of the report. The options are "PDF", "HTML" or "Word".
+#' @param latex_engine A string specifying the LaTeX engine for producing PDF output. The Options are "pdflatex", "lualatex", and "xelatex".
+#' @param clean A boolean indicating whether intermediate files (e.g. individual plots) should be kept. Using \code{TRUE} will clean
+#'   intermediate files that are created during rendering.
+#' @param fig.format A vector of strings containing the file format of the figures that are not removed if \code{clean} is set to \code{FALSE}.
+#'   The options are "jpeg", "png" and "pdf",  e.g. \code{fig.format = c("jpeg", "png", "pdf")}.
+#' @param dpi A positive integer specifying the resolution of the generated plot (\code{fig.format} "jpeg" or "png") in dots per inch (DPI).
+#' @param open A boolean specifying whether the report should be opened with the default system viewer after generation.
+#' @param ... Further arguments passed to or from other functions.
+#'
+#' @return
+#'
+#' @examples
+#' @export
 report.ranked.list=function(object,
                             consensus,
                             file,
                             title="<Challenge name>",
                             colors=default_colors,
                             format="PDF",
                             latex_engine="pdflatex",
+                            clean=TRUE,
                             fig.format = NULL, # file format of figures if clean=FALSE, can be vector, e.g. fig.format=c('jpeg','png', 'pdf')
                             dpi = 150, # DPI, relevant for bitmaps if clean==FALSE and fig.format specified
                             open=TRUE,
                             ...){
-
-  # Copy the report file to a temporary directory before processing it, in
-  # case we don't have write permissions to the current working dir (which
-  # can happen when deployed).
-  if (missing(file)) tempReport <- file.path(tempdir(),
-                                             "report.Rmd")
-  else {
-    a=strsplit(file,"/")[[1]]
-    path=paste0(a[-length(a)],
-                collapse="/")
-    if (path=="") tempReport=file.path(paste0(strsplit(a[length(a)],
-                                                       ".",
-                                                       fixed=T)[[1]][1],
-                                              ".Rmd"))
-    else tempReport=file.path(path,
-                              paste0(strsplit(a[length(a)],
-                                              ".",
-                                              fixed=T)[[1]][1],
-                                     ".Rmd"))
-  }
-  file.copy(file.path(system.file("appdir", package = "challengeR"),
-                      "reportMultiple.Rmd"),
-            tempReport,
-            overwrite = TRUE)
-
-  if (length(object$matlist) > 1) {
-    consensus = consensus
-    isMultiTask = TRUE
-  }
-  else {
-    consensus = NULL
-    isMultiTask = FALSE
-  }
-
-  # Set up parameters to pass to Rmd document
-  if (!is.null(fig.format) & format=="PDF") fig.format=c("pdf",fig.format)
-  if (!is.null(fig.format) && fig.format[1]=="pdf" && format=="Word") fig.format <- c(fig.format[-1], fig.format[1]) # in word avoid use of pdf to be embedded in document
-  params <- list(
-    object=object,
-    consensus=consensus,
-    name=title,
-    colors=colors,
-    isMultiTask=isMultiTask,
-    bootstrappingEnabled=FALSE,
-    fig.format = fig.format,
-    dpi = dpi
-  )
-
-  # Knit the document, passing in the `params` list, and eval it in a
-  # child of the global environment (this isolates the code in the document
-  # from the code in this app).
-  out <- render(tempReport,
-                switch(
-                  format,
-                  PDF = pdf_document(number_sections=T,
-                                     latex_engine=latex_engine),
-                  HTML = html_document(number_sections=T),
-                  Word = word_document(df_print="kable")
-                  ),
-                params = params,
-                envir = new.env(parent = globalenv()),
-                ...
-                )
-
-  if (!missing(file)){
-    if (is.na(strsplit(file,".",fixed=T)[[1]][2])) file=paste0(file,
-                                                               ".",
-                                                               strsplit(out,".",fixed=T)[[1]][2])
-    file.rename(out, file)
-  } else file=out
-
-  file.remove(tempReport)
-
-  if (open) system(paste0('open "', file, '"'))
+  report.bootstrap.list(object, consensus, file, title, colors, format, latex_engine, clean, fig.format, dpi, open, ...)
 }
diff --git a/R/subset.R b/R/subset.R
index 9f8285f..6c238e1 100644
--- a/R/subset.R
+++ b/R/subset.R
@@ -1,119 +1,183 @@
 subset <- function(x,...) UseMethod("subset")
 subset.default <- function(x, ...) base::subset(x, ...)
 
 
 subset.comparedRanks.list=function(x,
                                    tasks,...){
   res=x[tasks]
   class(res)="comparedRanks.list"
   res
 }
 
 subset.list=function(x,
                      tasks,...){
   x[tasks]
 }
 
 subset.aggregated.list=function(x,
                                 tasks,...){
   call=match.call(expand.dots = T)
   if (!is.null(as.list(call$top))) stop("Subset of algorithms only sensible for single task challenges.")
   matlist=x$matlist[tasks]
   res=list(matlist=matlist,
            call=list(x$call,call),
            data=x$data,
            FUN =  . %>% (x$FUN) %>%  (call)
   )
 
   class(res)=class(x)
   res
 
 }
 
 which.top=function(object,
                    top){
   mat=object$mat[object$mat$rank<=top,]
   rownames(mat)#[order(mat$rank)]
 }
 
-
-
+#' Extracts a subset of algorithms or tasks
+#'
+#' Extracts the top performing algorithms or a subset of tasks.
+#'
+#' @section Reports for subsets (top list) of algorithms:
+#' If ties are present in the ranking, the subset will consist of more than \code{top} algorithms.
+#' Line plots for ranking robustness can be used to check whether algorithms performing well in other
+#' ranking methods are excluded. Bootstrapping still takes entire uncertainty into account.
+#' Podium plots and ranking heatmaps neglect excluded algorithms. Only available for single-task challenges
+#' (for multi-task challenges not sensible because each task would contain a different set of algorithms).
+#'
+#' @section Reports for subsets of tasks:
+#' You may want to recompute the consensus ranking after creating the subset. An error will be raised
+#' if a task identifier is not contained in the assessment data set to avoid subsequent errors.
+#'
+#'
+#' @param x The ranked asssessment data set.
+#' @param top A positive integer specifying the amount of top performing algorithms to be retrieved.
+#' @param tasks A vector of strings containing the task identifiers that should remain in the subset.
+#' @param ... Further arguments passed to or from other functions.
+#'
+#' @return An S3 object of class "ranked.list" to represent a ranked assessment data set.
+#'
+#' @examples
+#'
+#' \dontrun{
+#'  # only show the top 3 algorithms according to the chosen ranking method
+#'  subset(ranking, top = 3) %>% report(...)
+#' }
+#'
+#' \dontrun{
+#'  # restrict report to tasks "task1" and "task2"
+#'  subset(ranking, tasks=c("task1", "task2")) %>% report(...)
+#' }
+#'
+#' @export
 subset.ranked.list <- function(x,
                                top,
                                tasks,...) {
-  
-#  if (!missing(tasks) & length(x$matlist) == 1) stop("Subset of tasks only sensible for multi task challenges.")
+
   if (!missing(top) & length(x$matlist) != 1)  stop("Subset of algorithms only sensible for single-task challenges. Otherwise no consensus ranking is possible.")
-  
+
   if (!missing(top)){
     taskMat <- x$matlist[[1]]
     taskData <- x$data[[1]]
     objectTop=x
     objectTop$matlist[[1]]=taskMat[taskMat$rank<=top,]
-    
+
     taskMatRowNames <- rownames(objectTop$matlist[[1]])
     attribute <- attr(objectTop$data,"algorithm")
-    
+
     selectedRowNames <- taskData[[attribute]] %in% taskMatRowNames
     objectTop$data[[1]] <- taskData[selectedRowNames,]
     if (is.factor(objectTop$data[[1]][[attribute]])) objectTop$data[[1]][[attribute]] <- droplevels(objectTop$data[[1]][[attribute]])
-    
+
     objectTop$fulldata=x$data
     return(objectTop)
   } else if (!missing(tasks)){
-    
+
     if (is.character(tasks) && any(!tasks%in%names(x$matlist))) {
-      stop("There is/are no task(s) called ",paste(tasks[!tasks%in%names(x$matlist)],collapse = " and "),".")  
+      stop("There is/are no task(s) called ",paste(tasks[!tasks%in%names(x$matlist)],collapse = " and "),".")
     }
     res=list(matlist=x$matlist[tasks],
              data=x$data[tasks],
              call=x$call,
              FUN=x$FUN,
              FUN.list=x$FUN.list
     )
-    
+
     attrib=attributes(x$data)
     attrib$names=attr(res$data,"names")
     attributes(res$data)=attrib
     class(res)=c("ranked.list","list")
     return(res)
   }
 }
 
 
-
-
+#' Extracts a subset of algorithms or tasks
+#'
+#' Extracts the top performing algorithms or a subset of tasks.
+#'
+#' @section Reports for subsets (top list) of algorithms:
+#' If ties are present in the ranking, the subset will consist of more than \code{top} algorithms.
+#' Line plots for ranking robustness can be used to check whether algorithms performing well in other
+#' ranking methods are excluded. Bootstrapping still takes entire uncertainty into account.
+#' Podium plots and ranking heatmaps neglect excluded algorithms. Only available for single-task challenges
+#' (for multi-task challenges not sensible because each task would contain a different set of algorithms).
+#'
+#' @section Reports for subsets of tasks:
+#' You may want to recompute the consensus ranking after creating the subset. An error will be raised
+#' if a task identifier is not contained in the assessment data set to avoid subsequent errors.
+#'
+#'
+#' @param x The bootstrapped, ranked asssessment data set.
+#' @param top A positive integer specifying the amount of top performing algorithms to be retrieved.
+#' @param tasks A vector of strings containing the task identifiers that should remain in the subset.
+#' @param ... Further arguments passed to or from other functions.
+#'
+#' @return An S3 object of class "bootstrap.list" to represent a bootstrapped, ranked assessment data set.
+#'
+#' @examples
+#'
+#' \dontrun{
+#'  # only show the top 3 algorithms according to the chosen ranking method
+#'  subset(ranking_bootstrapped, top = 3) %>% report(...)
+#' }
+#'
+#' \dontrun{
+#'  # restrict report to tasks "task1" and "task2" and recompute consensus ranking
+#'  meanRanks <- subset(ranking, tasks = c("task1", "task2")) %>% consensus(method = "euclidean")
+#' }
+#'
+#' @export
 subset.bootstrap.list=function(x,
                                top,
                                tasks, ...) {
-  
- # if (!missing(tasks) & length(x$matlist) == 1) stop("Subset of tasks only sensible for multi task challenges.")
+
   if (!missing(top) & length(x$matlist) != 1)  stop("Subset of algorithms only sensible for single-task challenges. Otherwise no consensus ranking is possible.")
-  
+
   if (!missing(top)){
     objectTop <- subset.ranked.list(x, top = top)
-    
+
     objectTop$bootsrappedRanks[[1]] <- objectTop$bootsrappedRanks[[1]][rownames(objectTop$matlist[[1]]),]
     objectTop$bootsrappedAggregate[[1]] <- objectTop$bootsrappedAggregate[[1]][rownames(objectTop$matlist[[1]]),]
     return(objectTop)
   } else if (!missing(tasks)){
     if (is.character(tasks) && any(!tasks%in%names(x$matlist))) {
-      stop("There is/are no task(s) called ",paste(tasks[!tasks%in%names(x$matlist)],collapse = " and "),".")  
+      stop("There is/are no task(s) called ",paste(tasks[!tasks%in%names(x$matlist)],collapse = " and "),".")
     }
-    
+
     res=list(bootsrappedRanks=x$bootsrappedRanks[tasks],
              bootsrappedAggregate=x$bootsrappedAggregate[tasks],
              matlist=x$matlist[tasks],
              data=x$data[tasks],
              FUN=x$FUN
     )
-    
+
     attrib=attributes(x$data)
     attrib$names=attr(res$data,"names")
     attributes(res$data)=attrib
     class(res)="bootstrap.list"
     return(res)
   }
 }
-
-
diff --git a/R/wrapper.R b/R/wrapper.R
index 54435b6..bef8971 100644
--- a/R/wrapper.R
+++ b/R/wrapper.R
@@ -1,21 +1,84 @@
+#' Performs ranking via aggregate-then-rank
+#'
+#' Performs ranking by first aggregating performance values across all cases (e.g., with the mean, median or another quantile) for each algorithm.
+#' This aggregate is then used to compute a rank for each algorithm.
+#'
+#' @param object The challenge object.
+#' @param FUN The aggregation function, e.g. mean, median, min, max, function(x), quantile(x, probs=0.05).
+#' @param ties.method A string specifying how ties are treated, see \code{\link{base::rank}}.
+#' @param ... Further arguments passed to or from other functions.
+#'
+#' @return An S3 object of class "ranked.list" to represent a ranked assessment data set.
+#'
+#' @examples
+#'
+#' \dontrun{
+#'  aggregateThenRank(challenge, FUN = mean, ties.method = "average", na.treat = 0)
+#' }
+#'
+#' @family ranking functions
+#' @export
 aggregateThenRank=function(object,FUN,ties.method = "min",...){
   object %>%
     aggregate(FUN=FUN,...) %>%
     rank(ties.method = ties.method)
 }
 
+#' Performs ranking via test-then-rank
+#'
+#' Computes statistical hypothesis tests based on Wilcoxon signed rank test for each possible
+#' pair of algorithms to assess differences in metric values between the algorithms.
+#' Then ranking is performed according to the number of significant one-sided test results.
+#' If algorithms have the same number of significant test results, then they obtain the same rank.
+#'
+#' @param object The challenge object.
+#' @param ties.method A string specifying how ties are treated, see \code{\link{base::rank}}.
+#' @param ... Further arguments passed to or from other functions.
+#'
+#' @return An S3 object of class "ranked.list" to represent a ranked assessment data set.
+#'
+#' @examples
+#' \dontrun{
+#'  testThenRank(challenge,
+#'               alpha=0.05, # significance level
+#'               p.adjust.method="none", # method for adjustment for multiple testing, see ?p.adjust
+#'               na.treat = 0)
+#' }
+#'
+#' @family ranking functions
+#' @export
 testThenRank=function(object, ties.method = "min",...){
   object %>%
     aggregate(FUN="significance",...) %>%
     rank(ties.method = ties.method)
 }
 
+#' Performs ranking via rank-then-aggregate
+#'
+#' Performs ranking by first computing a rank for each case for each algorithm (”rank first”).
+#' The final rank is based on the aggregated ranks for the cases. This ranking method handles missing values implicitly
+#' by assigning the worst rank to missing algorithm performances.
+#'
+#'
+#' @param object The challenge object.
+#' @param FUN The aggregation function, e.g., mean, median, min, max, function(x), quantile(x, probs=0.05).
+#' @param ties.method A string specifying how ties are treated, see \code{\link{base::rank}}.
+#'
+#' @return An S3 object of class "ranked.list" to represent a ranked assessment data set.
+#'
+#' @examples
+#' \dontrun{
+#'  rankThenAggregate(challenge, FUN = mean)
+#' }
+#'
+#' @family ranking functions
+#' @export
 rankThenAggregate=function(object,
                            FUN,
                            ties.method = "min"
                            ){
   object %>%
         rank(ties.method = ties.method)%>%
           aggregate(FUN=FUN) %>%
           rank(ties.method = ties.method) # small rank is always best, i.e. smallBetter always TRUE
 }
diff --git a/Readme.Rmd b/Readme.Rmd
index 41303d5..20849ce 100644
--- a/Readme.Rmd
+++ b/Readme.Rmd
@@ -1,545 +1,545 @@
 ---
 title: Methods and open-source toolkit for analyzing and visualizing challenge results
 output:
   pdf_document:
     toc: yes
     toc_depth: '3'
   github_document:
     toc: yes
     toc_depth: 1
 editor_options:
   chunk_output_type: console
 ---
 
 ```{r, echo = FALSE}
 knitr::opts_chunk$set(
  collapse = TRUE,
   comment = "#>",
  # fig.path = "README-",
     fig.width = 9,
     fig.height = 5,
     width=160
 )
 ```
 
 
 Note that this is ongoing work (version `r packageVersion("challengeR")`), there may be updates with possibly major changes. *Please make sure that you use the most current version!* 
 
 Change log at the end of this document.
 
 # Introduction
 
 The current framework is a tool for analyzing and visualizing challenge results in the field of biomedical image analysis and beyond.
 
 Biomedical challenges have become the de facto standard for benchmarking biomedical image analysis algorithms. While the number of challenges is steadily increasing, surprisingly little effort has been invested in ensuring high quality design, execution and reporting for these international competitions. Specifically, results analysis and visualization in the event of uncertainties have been given almost no attention in the literature. 
 
 Given these shortcomings, the current framework aims to enable fast and wide adoption of comprehensively analyzing and visualizing the results of single-task and multi-task challenges and applying them to a number of simulated and real-life challenges to demonstrate their specific strengths and weaknesses. This approach offers an intuitive way to gain important insights into the relative and absolute performance of algorithms, which cannot be revealed by commonly applied visualization techniques.
 
 # Installation
 
 Requires R version >= 3.5.2 (https://www.r-project.org).
 
 Further, a recent version of Pandoc (>= 1.12.3) is required. RStudio (https://rstudio.com) automatically includes this so you do not need to download Pandoc if you plan to use rmarkdown from the RStudio IDE, otherwise you’ll need to install Pandoc for your platform (https://pandoc.org/installing.html). Finally, if you want to generate a pdf report you will need to have LaTeX installed (e.g. MiKTeX, MacTeX or TinyTeX).
 
 
 To get the current development version of the R package from Github:
 
 ```{r, eval=F,R.options,}
 if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools")
 if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
 BiocManager::install("Rgraphviz", dependencies = TRUE)
 devtools::install_github("wiesenfa/challengeR", dependencies = TRUE)
 ```
 
 If you are asked whether you want to update installed packages and you type "a" for all, you might need administrator rights to update R core packages. You can also try to type "n" for updating no packages. If you are asked "Do you want to install from sources the packages which need compilation? (Yes/no/cancel)", you can safely type "no".
 
 If you get *Warning messages* (in contrast to *Error* messages), these might not be problematic and you can try to proceed. 
 
 # Terms of use
 Licenced under GPL-3. If you use this software for a publication, cite
 
 Wiesenfarth, M., Reinke, A., Landman, B.A., Cardoso, M.J., Maier-Hein, L. and Kopp-Schneider, A. (2019). Methods and open-source toolkit for analyzing and visualizing challenge results. *arXiv preprint arXiv:1910.05121*
 
 
  
 # Usage
 Each of the following steps have to be run to generate the report: (1) Load package, (2) load data, (3) perform ranking, (4) perform bootstrapping and (5) generation of the report
 
 ## 1. Load package
 Load package
 ```{r, eval=F}
 library(challengeR)
 ```
 
 
 ## 2. Load data
 
 ### Data requirements
 Data requires the following *columns*  
 
 * a *task identifier* in case of multi-task challenges.
 * a *test case identifier* 
 * the *algorithm name* 
 * the *metric value* 
 
 In case of missing metric values, a missing observation has to be provided (either as blank field or "NA").
 
 
 For example, in a challenge with 2 tasks, 2 test cases and 2 algorithms, where in task "T2", test case "case2", algorithm "A2" didn't give a prediction (and thus NA or a blank field for missing value is inserted), the data set might look like this: 
 
 ```{r, eval=T, echo=F,results='asis'}
 set.seed(1)
 a=cbind(expand.grid(Task=paste0("T",1:2),TestCase=paste0("case",1:2),Algorithm=paste0("A",1:2)),MetricValue=round(c(runif(7,0,1),NA),3))
 print(knitr::kable(a[order(a$Task,a$TestCase,a$Algorithm),],row.names=F))
 ```
 
 
 
 ### Load data
 If you have assessment data at hand stored in a csv file (if you want to use simulated data skip the following code line) use
 ```{r, eval=F, echo=T}
 data_matrix=read.csv(file.choose()) # type ?read.csv for help
 
 ```
 
 This allows to choose a file interactively, otherwise replace *file.choose()* by the file path (in style "/path/to/dataset.csv") in quotation marks.
 
 
 
 
 
 For illustration purposes, in the following simulated data is generated *instead* (skip the following code chunk if you have already loaded data). The data is also stored as "data_matrix.csv" in the repository.
 ```{r, eval=F, echo=T}
 if (!requireNamespace("permute", quietly = TRUE)) install.packages("permute")
 
 n=50
 
 set.seed(4)
 strip=runif(n,.9,1)
 c_ideal=cbind(task="c_ideal",
             rbind(
               data.frame(alg_name="A1",value=runif(n,.9,1),case=1:n),
               data.frame(alg_name="A2",value=runif(n,.8,.89),case=1:n),
               data.frame(alg_name="A3",value=runif(n,.7,.79),case=1:n),
               data.frame(alg_name="A4",value=runif(n,.6,.69),case=1:n),
               data.frame(alg_name="A5",value=runif(n,.5,.59),case=1:n)
             ))
 
 set.seed(1)
 c_random=data.frame(task="c_random",
                        alg_name=factor(paste0("A",rep(1:5,each=n))),
                        value=plogis(rnorm(5*n,1.5,1)),case=rep(1:n,times=5)
                        )
 
 strip2=seq(.8,1,length.out=5)
 a=permute::allPerms(1:5)
 c_worstcase=data.frame(task="c_worstcase",
                      alg_name=c(t(a)),
                      value=rep(strip2,nrow(a)),
                      case=rep(1:nrow(a),each=5)
                      )
 c_worstcase=rbind(c_worstcase,
                 data.frame(task="c_worstcase",alg_name=1:5,value=strip2,case=max(c_worstcase$case)+1)
           )
 c_worstcase$alg_name=factor(c_worstcase$alg_name,labels=paste0("A",1:5))
 
 data_matrix=rbind(c_ideal, c_random, c_worstcase)
 
 ```
 
 
 ## 3 Perform ranking 
 
 ### 3.1 Define challenge object
 Code differs slightly for single and multi task challenges.
 
 In case of a single task challenge use
 
 ```{r, eval=F, echo=T}
 # Use only task "c_random" in object data_matrix
   dataSubset=subset(data_matrix, task=="c_random")
 
   challenge=as.challenge(dataSubset,
                         # Specify how to refer to the task in plots and reports
                         taskName="Task 1",
                         # Specify which column contains the algorithm, 
                         # which column contains a test case identifier 
                         # and which contains the metric value:
                         algorithm="alg_name", case="case", value="value", 
                         # Specify if small metric values are better
                         smallBetter = FALSE)
 ```
 
 *Instead*, for a multi-task challenge use 
 
 ```{r, eval=F, echo=T}
 # Same as above but with 'by="task"' where variable "task" contains the task identifier
   challenge=as.challenge(data_matrix, 
                          by="task", 
                          algorithm="alg_name", case="case", value="value", 
                          smallBetter = FALSE)
 ```
 
 
 ### 3.2 Perform ranking 
 
 Different ranking methods are available, choose one of them:
 
 - for "aggregate-then-rank" use (here: take mean for aggregation)
 ```{r, eval=F, echo=T}
 ranking=challenge%>%aggregateThenRank(FUN = mean, # aggregation function, 
                                                   # e.g. mean, median, min, max, 
                                                   # or e.g. function(x) quantile(x, probs=0.05)
                                       na.treat=0, # either "na.rm" to remove missing data, 
                                                   # set missings to numeric value (e.g. 0) 
                                                   # or specify a function, 
                                                   # e.g. function(x) min(x)
                                       ties.method = "min" # a character string specifying 
                                                           # how ties are treated, see ?base::rank
                                             )  
 ```
 
 - *alternatively*, for  "rank-then-aggregate" with arguments as above (here: take mean for aggregation):
 ```{r, eval=F, echo=T}
 ranking=challenge%>%rankThenAggregate(FUN = mean,
                                       ties.method = "min"
                                       )
 ```
 
 - *alternatively*, for test-then-rank based on Wilcoxon signed rank test:
 ```{r, eval=F, echo=T}
 ranking=challenge%>%testThenRank(alpha=0.05, # significance level
                                  p.adjust.method="none",  # method for adjustment for
                                                           # multiple testing, see ?p.adjust
                                  na.treat=0, # either "na.rm" to remove missing data,
                                              # set missings to numeric value (e.g. 0)
                                              # or specify a function, e.g. function(x) min(x)
                                  ties.method = "min" # a character string specifying
                                                      # how ties are treated, see ?base::rank
                      )
 
 ```
 
 ## 4. Perform bootstrapping
 
 Perform bootstrapping with 1000 bootstrap samples using one CPU
 ```{r, eval=F, echo=T}
 set.seed(1)
 ranking_bootstrapped=ranking%>%bootstrap(nboot=1000)
 ```
 
 If you want to use multiple CPUs (here: 8 CPUs), use
 
 ```{r, eval=F, echo=T}
 library(doParallel)
 registerDoParallel(cores=8)  
 set.seed(1)
 ranking_bootstrapped=ranking%>%bootstrap(nboot=1000, parallel=TRUE, progress = "none")
 stopImplicitCluster()
 ```
 
 
 
 ## 5. Generate the report
 Generate report in PDF, HTML or DOCX format. Code differs slightly for single and multi task challenges.
 
 ### 5.1 For single task challenges
 ```{r, eval=F, echo=T}
 ranking_bootstrapped %>% 
   report(title="singleTaskChallengeExample", # used for the title of the report
          file = "filename", 
          format = "PDF", # format can be "PDF", "HTML" or "Word"
          latex_engine="pdflatex", #LaTeX engine for producing PDF output. Options are "pdflatex", "lualatex", and "xelatex"
          clean=TRUE #optional. Using TRUE will clean intermediate files that are created during rendering.
         ) 
 
 ```
 
 Argument *file* allows for specifying the output file path as well, otherwise the working directory is used.
 If file is specified but does not have a file extension, an extension will be automatically added according to the output format given in *format*. 
 Using argument *clean=FALSE* allows to retain intermediate files, such as separate files for each figure.
 
 If argument "file" is omitted, the report is  created in a temporary folder with file name "report".
 
 
 
 
 ### 5.1 For multi task challenges
 Same as for single task challenges, but additionally consensus ranking (rank aggregation across tasks) has to be given.
 
 Compute ranking consensus across tasks (here: consensus ranking according to mean ranks across tasks):
  
 ```{r, eval=F, echo=T}
 # See ?relation_consensus for different methods to derive consensus ranking
 meanRanks=ranking%>%consensus(method = "euclidean") 
 meanRanks # note that there may be ties (i.e. some algorithms have identical mean rank)
 ```
 
 Generate report as above, but with additional specification of consensus ranking
 ```{r, eval=F, echo=T}
 ranking_bootstrapped %>% 
   report(consensus=meanRanks,
          title="multiTaskChallengeExample",
          file = "filename", 
          format = "PDF", # format can be "PDF", "HTML" or "Word"
          latex_engine="pdflatex"#LaTeX engine for producing PDF output. Options are "pdflatex", "lualatex", and "xelatex"
         )
 ```
 
 # Troubleshooting
 
 In this section are compiled issues that the users reported.
 
 ### RStudio specific
 
 #### - Warnings while installing the Github repository
 ##### Error:
 
 While trying to install the current version of the repository:
 
 ```{r, eval=F, echo=T}
 devtools::install_github("wiesenfa/challengeR", dependencies = TRUE)
 ```
 
 The following warning showed up in the output:
 ```{r, eval=F, echo=T}
 WARNING: Rtools is required to build R packages, but is not currently installed.
 ```
 
 Therefore, Rtools was installed via a separate executable: https://cran.r-project.org/bin/windows/Rtools/ and the warning disappeared.
 	
 ##### Solution: 
 
 Actually there is no need of installing Rtools, it is not really used in the toolkit. Insted, choose not to install it when it is asked. See comment in the installation section: 
 
 “If you are asked whether you want to update installed packages and you type “a” for all, you might need administrator rights to update R core packages. You can also try to type “n” for updating no packages. If you are asked “Do you want to install from sources the packages which need compilation? (Yes/no/cancel)”, you can safely type “no”.”
 
 #### - Unable to install the current version of the tool from Github
 ##### Error:
 
 While trying the current version of the tool from github, it was unable to install. 
 
 The error message was:
 
 ```{r, eval=F, echo=T}
 byte-compile and prepare package for lazy loading
 Error: (converted from warning) package 'ggplot2' was built under R version 3.6.3
 Execution halted
 ERROR: lazy loading failed for package 'challengeR'
 * removing 'C:/Users/.../Documents/R/win-library/3.6/challengeR'
 * restoring previous 'C:/Users/.../Documents/R/win-library/3.6/challengeR'
 Error: Failed to install 'challengeR' from GitHub:
   (converted from warning) installation of package 'C:/Users/.../AppData/Local/Temp/Rtmp615qmV/file4fd419555eb4/challengeR_0.3.1.tar.gz' had non-zero exit status
 ```
 
 The problem was that some of the packages that were built under R3.6.1 had been updated, but the current installed version was still R3.6.1. 
 
 ##### Solution:
 
 The solution was to update R3.6.1 to R3.6.3. Another way would have been to reset the single packages to the versions built under R3.6.1
 
 
 #### - Unable to install the toolkit from Github
 ##### Error:
 
 While trying the current version of the tool from github, it was unable to install. 
 
 ```{r, eval=F, echo=T}
  devtools::install_github("wiesenfa/challengeR", dependencies = TRUE)
 ```
 
 The error message was:
 
 ```{r, eval=F, echo=T}
 Error: .onLoad failed in loadNamespace() for 'pkgload', details:
   call: loadNamespace(i, c(lib.loc, .libPaths()), versionCheck = vI[[i]])
   error: there is no package called ‘backports’
 ```
 
 The problem was that the packages 'backports' had not been installed. 
 
 ##### Solution:
 
 The solution was to install 'backports' manually.
 
 ```{r, eval=F, echo=T}
  install.packages("backports")
 ```
 
 
 #### - Unable to install R
 ##### Error:
 
 While trying to install the package in the R, after running the following commands:
 
 ```{r, eval=F, echo=T}
 if (!requireNamespace("devtools", quietly = TRUE)) install.packages("devtools")
 if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
 BiocManager::install("Rgraphviz", dependencies = TRUE)
 devtools::install_github("wiesenfa/challengeR", dependencies = TRUE)
 ```
 
 The error message was:
 
 ```{r, eval=F, echo=T}
 ERROR:
 1: In file(con, "r") :
  URL 'https://bioconductor.org/config.yaml': status was 'SSL connect error'
 2: packages ‘BiocVersion’, ‘Rgraphviz’ are not available (for R version 3.6.1)
 ```
 
 ##### Solution:
 
 The solution was to restart RStudio.
 
 
 #### - Incorrect column order
 ##### Error:
 
 When naming the columns "task" and "case", R was confused because the arguments in the challenge object are also called like this and it produced the following error:
 
 ```{r, eval=F, echo=T}
 Error in table(object[[task]][[algorithm]], object[[task]][[case]]) : all arguments must have the same length
 ```
 
 ##### Solution:
 
 The solution was to rename the columns.
 
 #### - Wrong versions of packages
 ##### Error:
 
 While running this command :
 
 ```{r, eval=F, echo=T}
  devtools::install_github("wiesenfa/challengeR", dependencies = TRUE)
 ```
 
 I had the following errors : 
 - Error : the package 'purrr' has been compiled with version of R 3.6.3
 - Error : the package 'ggplot2' has been compiled with version of R 3.6.3
 - Error in loadNamespace(j <- i[[L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]])
   namespace 'glue' 1.3.1 is already loaded, but >= 1.3.2 is required
   
 ##### Solution:
 
 To solve the issue I changed the versions of the packages.
 I had the following versions : 
 - purrr 0.3.4
 - ggplot2 3.3.2
 - glue 1.3.1
 
 I moved to the following ones :
 - purrr 0.3.3
 - ggplot2 3.3.0
 - glue 1.4.2
 
 
 
 ### Related to MikText
 
 #### - Missing packages
 ##### Error:
 
 While generating the PDF with Miktext (2.9), the following error showed up: 
 
 ```{r, eval=F, echo=T}
 fatal pdflatex - gui framework cannot be initialized
 ```
 
 There is an issue with installing missing packages in LaTeX.
 
 ##### Solution:
 
 Open your MiKTeX Console --> Settings, select "Always install missing packages on-the-fly". Then generate the report. Once the report is generated, you can reset the settings to your preferred ones.
 
 
 #### - Unable to generate report
 ##### Error:
 
 While generating the PDF with Miktext (2.9):
 
 ```{r, eval=F, echo=T}
 ranking_bootstrapped %>% 
   report(title="singleTaskChallengeExample", # used for the title of the report
          file = "filename", 
          format = "PDF", # format can be "PDF", "HTML" or "Word"
          latex_engine="pdflatex", #LaTeX engine for producing PDF output. Options are "pdflatex", "lualatex", and "xelatex"
          clean=TRUE #optional. Using TRUE will clean intermediate files that are created during rendering.
         ) 
 
 ```
 
 The following error showed up: 
 
 ```{r, eval=F, echo=T}
 output file: filename.knit.md
 
 "C:/Program Files/RStudio/bin/pandoc/pandoc" +RTS -K512m -RTS filename.utf8.md --to latex --from markdown+autolink_bare_uris+tex_math_single_backslash --output filename.tex --self-contained --number-sections --highlight-style tango --pdf-engine pdflatex --variable graphics --lua-filter "C:/Users/adm/Documents/R/win-library/3.6/rmarkdown/rmd/lua/pagebreak.lua" --lua-filter "C:/Users/adm/Documents/R/win-library/3.6/rmarkdown/rmd/lua/latex-div.lua" --variable "geometry:margin=1in" 
 
 Error: LaTeX failed to compile filename.tex. See https://yihui.org/tinytex/r/#debugging for debugging tips.
 
   Warning message:
 In system2(..., stdout = if (use_file_stdout()) f1 else FALSE, stderr = f2) :
   '"pdflatex"' not found
 ```
 
 ##### Solution:
 
 The solution was to restart RStudio.
 
 
 # Changes
 
 #### Version 0.3.3
 - Force line break to avoid that authors exceed the page in generated PDF reports
 
 #### Version 0.3.2
 - Correct names of authors
 
 #### Version 0.3.1
 - Refactoring
 
 #### Version 0.3.0
 - Major bug fix release
 
 #### Version 0.2.5
 - Bug fixes
 
 
 #### Version 0.2.4
 - Automatic insertion of missings
 
 #### Version 0.2.3
 - Bug fixes
-- Reports for subsets (top list) of algorithms: Use e.g. `subset(ranking_bootstrapped, top=3) %>% report(...)` (or `subset(ranking, top=3) %>% report(...)` for report without bootstrap results) to only show the top 3 algorithms according to the chosen ranking methods, where  `ranking_bootstrapped` and `ranking` objects as defined in the example. Line plot for ranking robustness can be used to check whether algorithms performing well in other ranking methods are excluded. Bootstrapping still takes entire uncertainty into account. Podium plot neglect and ranking heatmap neglect excluded algorithms. Only available for single task challenges (for mutli task challenges not sensible because each task would contain a different sets of algorithms). 
-- Reports for subsets of tasks: Use e.g. `subset(ranking_bootstrapped, tasks=c("task1", "task2","task3)) %>% report(...)` to restrict report to tasks "task1", "task2","task3. You may want to recompute the consensus ranking before using `meanRanks=subset(ranking, tasks=c("task1", "task2","task3))%>%consensus(method = "euclidean")`
+- Reports for subsets (top list) of algorithms: Use e.g. `subset(ranking_bootstrapped, top=3) %>% report(...)` (or `subset(ranking, top=3) %>% report(...)` for report without bootstrap results) to only show the top 3 algorithms according to the chosen ranking methods, where  `ranking_bootstrapped` and `ranking` objects as defined in the example. Line plot for ranking robustness can be used to check whether algorithms performing well in other ranking methods are excluded. Bootstrapping still takes entire uncertainty into account. Podium plot and ranking heatmap neglect excluded algorithms. Only available for single-task challenges (for multi-task challenges not sensible because each task would contain a different set of algorithms). 
+- Reports for subsets of tasks: Use e.g. `subset(ranking_bootstrapped, tasks=c("task1", "task2","task3")) %>% report(...)` to restrict report to tasks "task1", "task2","task3. You may want to recompute the consensus ranking before using `meanRanks=subset(ranking, tasks=c("task1", "task2", "task3"))%>%consensus(method = "euclidean")`
 
 #### Version 0.2.1
 - Introduction in reports now mentions e.g. ranking method, number of test cases,...
 - Function `subset()` allows selection of tasks after bootstrapping, e.g. `subset(ranking_bootstrapped,1:3)`
 - `report()` functions gain argument `colors` (default: `default_colors`). Change e.g. to `colors=viridisLite::inferno` which "is designed in such a way that it will analytically be perfectly perceptually-uniform, both in regular form and also when converted to black-and-white. It is also designed to be perceived by readers with the most common form of color blindness." See package `viridis` for further similar functions.
 
 #### Version 0.2.0
 - Improved layout in case of many algorithms and tasks (while probably still not perfect)
 - Consistent coloring of algorithms across figures
 - `report()` function can be applied to ranked object before bootstrapping (and thus excluding figures based on bootstrapping), i.e. in the example `ranking %>% report(...)`
 - bug fixes
   
 # Team
 
 The developer team includes members from both Computer Assisted Medical Interventions (CAMI) and Biostatistics Division from the German Cancer Research Center (DKFZ): 
 
 - Manuel Wiesenfarth 
 - Annette Kopp-Schneider
 - Annika Reinke
 - Matthias Eisenmann
 - Laura Aguilera Saiz
 - Lena Maier-Hein
   
 # Reference
 
 Wiesenfarth, M., Reinke, A., Landman, B.A., Cardoso, M.J., Maier-Hein, L. and Kopp-Schneider, A. (2019). Methods and open-source toolkit for analyzing and visualizing challenge results. *arXiv preprint arXiv:1910.05121*
 
 <img src="DKFZ_Logo.png" height="120px" width="400px" />
 <img src="HIP_Logo.png" height="200px" width="440px" />