diff --git a/R/Bootstrap.R b/R/Bootstrap.R index f85f99c..42ce7ac 100644 --- a/R/Bootstrap.R +++ b/R/Bootstrap.R @@ -1,160 +1,192 @@ bootstrap <- function(object,...) UseMethod("bootstrap") bootstrap.default <- function(object, ...) stop("not implemented for this class") +#' Performs bootstrapping +#' +#' Performs bootstrapping on a ranked assessment data set and applies the ranking method to each bootstrap sample. One bootstrap sample of +#' a task with \code{n} cases consists of \code{n} cases randomly drawn with replacement from this task. +#' A total of \code{nboot} of these bootstrap samples are drawn. +#' +#' @param object The ranked assessment data set. +#' @param nboot The number of bootstrap samples. +#' @param parallel A boolean specifying whether parallel processing should be enabled. +#' @param progress A string specifying the type of progress indication. +#' @param ... Further arguments passed to or from other functions. +#' +#' @return An S3 object of class "bootstrap.list" to represent a bootstrapped, ranked assessment data set. +#' +#' @examples +#' +#' \dontrun{ +#' # perform bootstrapping with 1000 bootstrap samples using one CPU +#' set.seed(1) +#' ranking_bootstrapped <- bootstrap(ranking, nboot = 1000) +#' } +#' +#' \dontrun{ +#' # perform bootstrapping using multiple CPUs (here: 8 CPUs) +#' library(doParallel) +#' registerDoParallel(cores=8) +#' set.seed(1) +#' ranking_bootstrapped <- bootstrap(ranking, nboot = 1000, parallel = TRUE, progress = "none") +#' stopImplicitCluster() +#' } +#' +#' @export bootstrap.ranked.list=function(object, nboot, parallel=FALSE, progress="text", ...){ algorithm=attr(object$data,"algorithm") by=attr(object$data,"case") # exclude if only 1 data set or less than 3 algorithms tidy.data.id=sapply(object$data, function(data.subset) { ifelse((length(unique(data.subset[[by]]))==1 | length(unique(data.subset[[algorithm]]))<=2 ), yes=FALSE, no=TRUE) }) tidy.data=object$data[tidy.data.id] tidy.matlist=object$matlist[tidy.data.id] res= llply(1:nboot, function(it){ # draw 1 sample for each task bootDatalist = lapply(tidy.data, function(data.subset) { index = unique(data.subset[[by]]) # bootIndex=sample(index,size=length(index),replace=TRUE) # bootData=bind_rows(lapply(bootIndex,function(zz) data.subset[data.subset[[by]]==zz,])) # faster: bootIndex = data.frame(sample(index, size = length(index), replace = TRUE)) colnames(bootIndex) = by bootData = merge(bootIndex, data.subset, by = by) bootData }) attr(bootDatalist, "inverseOrder") = attr(object$data, "inverseOrder") attr(bootDatalist, "algorithm") = attr(object$data, "algorithm") attr(bootDatalist, "case") = attr(object$data, "case") attr(bootDatalist, "check") = FALSE object$FUN(bootDatalist)$mat }, .parallel = parallel, .progress = progress) rankmatlist = lapply(res[[1]], function(z) z[, "rank", drop = F] ) for (j in 2:length(res)) { rankmatlist = quickmerge.list(rankmatlist, lapply(res[[j]], function(z) z[, "rank", drop = F])) } aggmatlist = lapply(res[[1]], function(z) z[, -2, drop = F]) for (j in 2:length(res)) { aggmatlist = quickmerge.list(aggmatlist, lapply(res[[j]], function(z) z[, -2, drop = F])) } final=list(bootsrappedRanks=rankmatlist, bootsrappedAggregate=aggmatlist, data=object$data, matlist=tidy.matlist, FUN=object$FUN, FUN.list=object$FUN.list) class(final)=c("bootstrap.list") final } #################################################################################################### # deprecate following functions? rankFrequencies <- function(object,...) UseMethod("rankFrequencies") rankFrequencies.default <- function(object, ...) stop("not implemented for this class") rankFrequencies.bootstrap=function(object, who,...){ if (is.data.frame(who)) who=rownames(who) if (length(who)==1){ res=table(t(object$bootsrappedRanks[rownames(object$bootsrappedRanks)==who,])) cat("\n",who,"\n") print(res) } else { res=lapply(who, function(w){ rr=table(t(object$bootsrappedRanks[rownames(object$bootsrappedRanks)==w,])) cat(w,"\n") print(rr) cat("\n") rr }) } res=c(list(rankFrequencies=res),object) invisible(res) } rankFrequencies.bootstrap.list=function(object, who,...){ if (is.data.frame(who)) who=rownames(who) res=lapply(object$bootsrappedRanks,function(bootMat){ if (length(who)==1){ res=table(t(bootMat[rownames(bootMat)==who,])) cat("\n",who,"\n") print(res) } else { res=lapply(who, function(w){ rr=table(t(bootMat[rownames(bootMat)==w,])) cat(w,"\n") print(rr) cat("\n") rr }) } res }) res=c(list(rankFrequencies=res),object) invisible(res) } winnerFrequencies <- function(object,...) UseMethod("winnerFrequencies") winnerFrequencies.default <- function(object, ...) stop("not implemented for this class") # Achtung: bester rank muss ==1 sein und nicht z.B. 1.5 winnerFrequencies.bootstrap=function(object,...){ rankings_dicho=ifelse(object$bootsrappedRanks==1,1,0) winnerFrequencies=data.frame(winnerFrequency=rowSums(rankings_dicho),row.names = rownames(object$bootsrappedRanks)) res=merge(object$mat,winnerFrequencies,by="row.names",...) rownames(res)=res[,1] res=res[,-1] # res=c(res=res,object) # class(res)="bootstrapResults" res } winnerFrequencies.bootstrap.list=function(object,...){ res=lapply(1:length(object$bootsrappedRanks),function(id){ rankings_dicho=ifelse(object$bootsrappedRanks[[id]]==1,1,0) winnerFrequencies=data.frame(winnerFrequency=rowSums(rankings_dicho),row.names = rownames(object$bootsrappedRanks[[id]])) res=merge(object$matlist[[id]],winnerFrequencies,by="row.names",...) rownames(res)=res[,1] res=res[,-1] res }) names(res)=names(object$bootsrappedRanks) res } diff --git a/R/challengeR.R b/R/challengeR.R index 6bd949e..69117af 100644 --- a/R/challengeR.R +++ b/R/challengeR.R @@ -1,188 +1,188 @@ #' Constructs a challenge object #' #' Constructs an S3 object to represent the configuration of an assessment data set originating from a benchmarking competition (so-called "challenge"). #' #' @section Assessment data set: #' The toolkit provides visualization approaches for both challenges designed around a single task (single-task challenges) and for challenges comprising multiple tasks (multi-task challenges). #' For a single-task challenge, the assessment data set (argument \code{object}) requires the following columns: #' \itemize{ #' \item test case identifier (string or numeric) #' \item algorithm identifier (string or numeric) #' \item performance value (numeric) #' } #' #' For a multi-task challenge, the assessment data set (argument \code{object}) requires the following columns: #' \itemize{ #' \item task identifier (string or numeric) #' \item test case identifier (string or numeric) #' \item algorithm identifier (string or numeric) #' \item performance value (numeric) #' } #' #' @section Sanity check: #' It is highly recommended that the sanity check is not disabled when the data set is provided initially. #' It checks that: #' \itemize{ #' \item performance values are numeric (if not, raises error) #' \item algorithm performances are observed for all cases (if not, adds them as NA and emits a message) #' \item cases appear only once for the same algorithm (if not, raises error) #' } #' If the argument \code{na.treat} for treatment of NA is specified, NAs will be handled respectively. #' #' It might be reasonable to disable the sanity check for further computations (e.g., for performance reasons #' during bootstrapping (\code{\link{bootstrap.ranked.list}}) where cases are actually allowed to appear more than once for the same algorithm). #' #' @param object A data frame containing the assessment data. #' @param case A string specifying the name of the column that contains the case identifiers. #' @param algorithm A string specifying the name of the column that contains the algorithm identifiers. #' @param value A string specifying the name of the column that contains the performance values. #' @param by A string specifying the name of the column that contains the task identifiers. Required for multi-task data set. #' @param taskName A string specifying the task name for single-task data set that does not contain a task column. #' This argument is optional for a single-task data set and is ignored for a multi-task data set. #' @param annotator Not supported #' @param smallBetter A boolean specifying whether small performance values indicate better algorithm performance. #' @param na.treat Indicates how missing perfomance values are treated if sanity check is enabled. It can be 'na.rm', numeric value or function. #' For a numeric value or function, NAs will be replaced by the specified values. For 'na.rm', rows that contain missing values will be removed. #' @param check A boolean to indicate to perform a sanity check of the specified data set and arguments if set to \code{TRUE}. #' #' @return An S3 object to represent the configuration of an assessment data set. -#' @export #' #' @examples #' # single-task data set #' #' # multi-task data set #' +#' @export as.challenge=function(object, case, algorithm, value, by=NULL, taskName=NULL, annotator=NULL, smallBetter=FALSE, na.treat=NULL, # optional check=TRUE) { object=as.data.frame(object[,c(value, algorithm, case, by, annotator)]) # sanity checks if (check) { if (!is.null(by) && !is.null(taskName)) { warning("Argument 'taskName' is ignored for multi-task data set.") } # Add task column for data set without task column by using the specified task name. if (is.null(by) && !is.null(taskName)) { taskName <- trimws(taskName) if (taskName == "") { stop("Argument 'taskName' is empty.") } object <- cbind(task=taskName, object) by = "task" } # Add task column for data set without task column by using a dummy task name. if (is.null(by) && is.null(taskName)) { object <- cbind(task="dummyTask", object) by = "task" } object=splitby(object,by=by) object=lapply(object,droplevels) missingData = n.missing = list() for (task in names(object)) { if (!all(is.numeric(object[[task]][[value]]))) stop("Performance values must be numeric.") n.missing[[task]] <- sum(is.na(object[[task]][[value]])) # already missing before na.treat; for report if (n.missing[[task]]>0) message("Note: ", n.missing, " missing cases have been found in the data set.") # check for missing cases missingData[[task]]=object[[task]] %>% expand(!!as.symbol(algorithm), !!as.symbol(case))%>% anti_join(object[[task]], by=c( algorithm,case)) if (nrow(missingData[[task]])>0) { if (length(object) == 1 ) { # single task message("Performance of not all algorithms has been observed for all cases. Therefore, missings have been inserted in the following cases:") } else { # multi task message("Performance of not all algorithms has been observed for all cases in task '", task, "'. Therefore, missings have been inserted in the following cases:") } print(as.data.frame(missingData[[task]])) object[[task]]=as.data.frame(object[[task]] %>% complete(task, !!as.symbol(algorithm), !!as.symbol(case))) } # check duplicate cases all1=apply(table(object[[task]][[algorithm]], object[[task]][[case]]), 2, function(x) all(x==1)) if (!all(all1)) { n.duplicated <- sum(all1!=1) if (length(object) == 1 ) { # single task if (n.duplicated/length(all1) >= 1/5) { # at least a quarter of the cases is duplicated stop ("The following case(s) appear(s) more than once for the same algorithm. Please revise. ", "Or are you considering a multi-task challenge and forgot to specify argument 'by'?\n", "Case(s): ", paste(names(which(all1!=1)), collapse=", ") ) } else { stop ("The following case(s) appear(s) more than once for the same algorithm. Please revise.\n", "Case(s): ", paste(names(which(all1!=1)), collapse=", ") ) } } else { # multi task stop ("The following case(s) appear(s) more than once for the same algorithm in task '", task, "'. Please revise.\n", "Case(s): ", paste(names(which(all1!=1)), collapse=", ") ) } } if (!is.null(na.treat)) { if (is.numeric(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat else if (is.function(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat(object[[task]][,value][is.na(object[[task]][,value])]) else if (is.character(na.treat) && na.treat=="na.rm") object[[task]]=object[[task]][!is.na(object[[task]][,value]),] } } } if (check==TRUE && (any(sapply(missingData, function(x) nrow(x))>0) |any(n.missing>0))) { if (is.null(na.treat)) message("For aggregate-then-rank, na.treat will have to be specified. ", "For rank-then-aggregate, missings will implicitly lead to the algorithm ranked last for the missing test case." ) else if (is.numeric(na.treat)) message("All missings have been replaced by the value ", na.treat,".\n") else if (is.character(na.treat) && na.treat=="na.rm") message("All missings have been removed.") else if (is.function(na.treat)) { message("Missings have been replaced using function ") print(na.treat) } } if (check==TRUE){ attr(object,"n.missing")=n.missing attr(object,"missingData")=missingData } attr(object,"na.treat")=na.treat attr(object,"algorithm")=algorithm attr(object,"value")=value attr(object,"case")=case attr(object,"annotator")=annotator attr(object,"by")=by attr(object,"smallBetter")=smallBetter attr(object,"check")=check class(object)=c("challenge", class(object)) object } diff --git a/R/wrapper.R b/R/wrapper.R index 96480ba..bef8971 100644 --- a/R/wrapper.R +++ b/R/wrapper.R @@ -1,82 +1,84 @@ #' Performs ranking via aggregate-then-rank #' #' Performs ranking by first aggregating performance values across all cases (e.g., with the mean, median or another quantile) for each algorithm. #' This aggregate is then used to compute a rank for each algorithm. #' #' @param object The challenge object. #' @param FUN The aggregation function, e.g. mean, median, min, max, function(x), quantile(x, probs=0.05). #' @param ties.method A string specifying how ties are treated, see \code{\link{base::rank}}. +#' @param ... Further arguments passed to or from other functions. #' #' @return An S3 object of class "ranked.list" to represent a ranked assessment data set. #' #' @examples #' #' \dontrun{ #' aggregateThenRank(challenge, FUN = mean, ties.method = "average", na.treat = 0) #' } #' #' @family ranking functions #' @export aggregateThenRank=function(object,FUN,ties.method = "min",...){ object %>% aggregate(FUN=FUN,...) %>% rank(ties.method = ties.method) } #' Performs ranking via test-then-rank #' #' Computes statistical hypothesis tests based on Wilcoxon signed rank test for each possible #' pair of algorithms to assess differences in metric values between the algorithms. #' Then ranking is performed according to the number of significant one-sided test results. #' If algorithms have the same number of significant test results, then they obtain the same rank. #' #' @param object The challenge object. #' @param ties.method A string specifying how ties are treated, see \code{\link{base::rank}}. +#' @param ... Further arguments passed to or from other functions. #' #' @return An S3 object of class "ranked.list" to represent a ranked assessment data set. #' #' @examples #' \dontrun{ #' testThenRank(challenge, #' alpha=0.05, # significance level #' p.adjust.method="none", # method for adjustment for multiple testing, see ?p.adjust #' na.treat = 0) #' } #' #' @family ranking functions #' @export testThenRank=function(object, ties.method = "min",...){ object %>% aggregate(FUN="significance",...) %>% rank(ties.method = ties.method) } #' Performs ranking via rank-then-aggregate #' #' Performs ranking by first computing a rank for each case for each algorithm (”rank first”). #' The final rank is based on the aggregated ranks for the cases. This ranking method handles missing values implicitly #' by assigning the worst rank to missing algorithm performances. #' #' #' @param object The challenge object. #' @param FUN The aggregation function, e.g., mean, median, min, max, function(x), quantile(x, probs=0.05). #' @param ties.method A string specifying how ties are treated, see \code{\link{base::rank}}. #' #' @return An S3 object of class "ranked.list" to represent a ranked assessment data set. #' #' @examples #' \dontrun{ #' rankThenAggregate(challenge, FUN = mean) #' } #' #' @family ranking functions #' @export rankThenAggregate=function(object, FUN, ties.method = "min" ){ object %>% rank(ties.method = ties.method)%>% aggregate(FUN=FUN) %>% rank(ties.method = ties.method) # small rank is always best, i.e. smallBetter always TRUE }