diff --git a/R/challengeR.R b/R/challengeR.R index fb5ed9b..7f161d1 100644 --- a/R/challengeR.R +++ b/R/challengeR.R @@ -1,102 +1,104 @@ #' Title #' #' @param object #' @param value #' @param algorithm #' @param case #' @param taskName Optional for single-task data set that does not contain a task column. #' @param by The name of the column that contains the task identifiers. Required for multi-task data set. #' @param annotator #' @param smallBetter #' @param na.treat #' @param check #' #' @return #' @export #' #' @examples as.challenge=function(object, value, algorithm , case=NULL, taskName=NULL, by=NULL, annotator=NULL, smallBetter=FALSE, na.treat=NULL, # optional check=TRUE) { object=as.data.frame(object[,c(value, algorithm, case, by, annotator)]) # sanity checks if (check) { if (!is.null(by) && !is.null(taskName)) { warning("Argument 'taskName' is ignored for multi-task data set.") } # Add task column for data set without task column by using the specified task name. if (is.null(by) && !is.null(taskName)) { taskName <- trimws(taskName) if (taskName == "") { stop("Argument 'taskName' is empty.") } object <- cbind(task=taskName, object) by = "task" } # Add task column for data set without task column by using a dummy task name. if (is.null(by) && is.null(taskName)) { object <- cbind(task="dummyTask", object) by = "task" } object=splitby(object,by=by) object=lapply(object,droplevels) for (task in names(object)) { - missingData=object[[task]] %>% - expand(!!as.symbol(algorithm), - !!as.symbol(case))%>% - anti_join(object[[task]], - by=c( algorithm,case)) - if (nrow(missingData)>0) { - message("Performance of not all algorithms is observed for all cases in task '", - task, - "'. Inserted as missings in following cases:") - print(as.data.frame(missingData)) - object[[task]]=as.data.frame(object[[task]] %>% - complete(!!as.symbol(algorithm), - !!as.symbol(case))) - } - else { - all1=apply(table(object[[task]][[algorithm]], - object[[task]][[case]]), - 2, - function(x) all(x==1)) - if (!all(all1)) stop ("Case(s) (", - paste(names(which(all1!=1)), - collapse=", "), - ") appear(s) more than once for the same algorithm in task '", - task, "'.") - } + # check for missing cases + missingData=object[[task]] %>% + expand(!!as.symbol(algorithm), + !!as.symbol(case))%>% + anti_join(object[[task]], + by=c( algorithm,case)) + if (nrow(missingData)>0) { + message("Performance of not all algorithms is observed for all cases in task '", + task, + "'. Inserted as missings in following cases:") + print(as.data.frame(missingData)) + object[[task]]=as.data.frame(object[[task]] %>% + complete(!!as.symbol(algorithm), + !!as.symbol(case))) + } + # check duplicate cases + all1=apply(table(object[[task]][[algorithm]], + object[[task]][[case]]), + 2, + function(x) all(x==1)) + if (!all(all1)) stop ("Case(s) (", + paste(names(which(all1!=1)), + collapse=", "), + ") appear(s) more than once for the same algorithm in task '", + task, + "'. Please revise.") + # } if (!is.null(na.treat)) { if (is.numeric(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat else if (is.function(na.treat)) object[[task]][,value][is.na(object[[task]][,value])]=na.treat(object[[task]][,value][is.na(object[[task]][,value])]) else if (na.treat=="na.rm") object[[task]]=object[[task]][!is.na(object[[task]][,value]),] } } } attr(object,"algorithm")=algorithm attr(object,"value")=value attr(object,"case")=case attr(object,"annotator")=annotator attr(object,"by")=by attr(object,"largeBetter")=!smallBetter attr(object,"check")=check class(object)=c("challenge", class(object)) object }