diff --git a/R/Aggregate.R b/R/Aggregate.R index 2a3f680..5211082 100644 --- a/R/Aggregate.R +++ b/R/Aggregate.R @@ -1,166 +1,166 @@ Aggregate <- function(object,...) UseMethod("Aggregate") Aggregate.default <- function(object, ...) aggregate(object,...) #stats::aggregate Aggregate.data.frame <-function(object, x, algorithm, FUN=mean, na.treat="na.rm", #can be na.rm, numeric value or function case, alpha=0.05, p.adjust.method="none", alternative="one.sided", test.fun=function(x,y) wilcox.test(x, y, alternative = alternative, exact=FALSE, paired = TRUE)$p.value, largeBetter=TRUE, # only needed for significance ... ){ call=match.call(expand.dots = T) if (is.numeric(na.treat)) object[,x][is.na(object[,x])]=na.treat else if (is.function(na.treat)) object[,x][is.na(object[,x])]=na.treat(object[,x][is.na(object[,x])]) else if (na.treat=="na.rm") object=object[!is.na(object[,x]),] else stop("Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.") if (is.character(FUN) && FUN=="significance"){ if(missing(case)| missing(largeBetter)| missing(alpha)) stop("If FUN='significance' arguments case, largeBetter and alpha need to be given") if (length(unique(object[[algorithm]]))<=1){ warning("only one ", algorithm, " available") agg=data.frame() } else { agg = significance(object, x, algorithm, case, alpha, largeBetter, p.adjust.method = p.adjust.method, alternative = alternative, ... ) } isSignificance=TRUE } else { if (is.function(FUN)) FUNname <-gsub('\")',"",gsub('UseMethod(\"',"",deparse(functionBody(FUN)),fixed = T),fixed=T) else if (is.character(FUN)) FUNname=FUN if (is.character(FUN)) FUN=try(eval(parse(text=FUN)),silent = T) if (!is.function(FUN)) stop("FUN has to be a function (possibly as character) or 'significance'") agg <- aggregate(object[, x], by = list(object[, algorithm]), FUN = function(z) do.call(FUN, args = list(x = z)) ) names(agg)=c(algorithm, paste0(x,"_", FUNname)) rownames(agg)=agg[,1] agg=agg[,-1,drop=F] isSignificance=FALSE } res=list(FUN = . %>% (call), FUN.list=list(FUN), call=list(call), data=object, mat=agg, isSignificance= isSignificance) class(res)=c("aggregated",class(res)) res } Aggregate.list <-function(object, x, algorithm, FUN = mean, na.treat = "na.rm", parallel = FALSE, progress = "none", case, alpha = 0.05, p.adjust.method = "none", alternative = "one.sided", test.fun = function(x, y) wilcox.test(x, y, alternative = alternative, exact = FALSE, paired = TRUE)$p.value, largeBetter = TRUE, # only needed for significance ... ) { call=match.call(expand.dots = T) if (is.character(FUN) && FUN=="significance"){ if(missing(case)| missing(largeBetter)| missing(alpha)) stop("If FUN='significance' arguments case, largeBetter and alpha need to be given") matlist=llply(1:length(object), function(id){ piece=object[[id]] if (length(unique(piece[[algorithm]]))<=1){ - warning("only one ", algorithm, " available in element ", names(object)[id]) + warning("Only one algorithm available in task '", names(object)[id], "'.") return(data.frame("prop_significance"=rep(NA,length(unique(piece[[algorithm]]))), row.names = unique(piece[[algorithm]]))) } if (is.numeric(na.treat)) piece[,x][is.na(piece[,x])]=na.treat else if (is.function(na.treat)) piece[,x][is.na(piece[,x])]=na.treat(piece[,x][is.na(piece[,x])]) else if (na.treat=="na.rm") piece=piece[!is.na(piece[,x]),] else stop("Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.") xmean <- significance(piece, x, algorithm, case, alpha, p.adjust.method=p.adjust.method, largeBetter, alternative=alternative, ...) class(xmean)=c("aggregated", class(xmean)) xmean }, .parallel=parallel, .progress=progress ) isSignificance=TRUE } else { if (is.function(FUN)) FUNname <-gsub('\")',"",gsub('UseMethod(\"',"",deparse(functionBody(FUN)),fixed = T),fixed=T) else if (is.character(FUN)) FUNname=FUN - + if (is.character(FUN)) FUN=try(eval(parse(text=FUN)), silent = T) if (!is.function(FUN)) stop("FUN has to be a function (possibly as character) or 'significance'") matlist=llply(object, function(piece){ if (is.numeric(na.treat)) piece[,x][is.na(piece[,x])]=na.treat else if (is.function(na.treat)) piece[,x][is.na(piece[,x])]=na.treat(piece[,x][is.na(piece[,x])]) else if (na.treat=="na.rm") piece=piece[!is.na(piece[,x]),] else stop("Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.") xmean <- aggregate(piece[,x], by=list(piece[,algorithm]), FUN=function(z) do.call(FUN,args=list(x=z))) names(xmean)=c(algorithm, paste0(x,"_",FUNname)) rownames(xmean)=xmean[,1] xmean=xmean[,-1,drop=F] xmean }, .parallel=parallel, .progress=progress ) isSignificance=FALSE } names(matlist)=names(object) res=list(FUN = . %>% (call), FUN.list=list(FUN), call=list(call), data=object, matlist=matlist, isSignificance=isSignificance ) class(res)=c("aggregated.list",class(res)) res } diff --git a/tests/testthat/test-testThenRank.R b/tests/testthat/test-testThenRank.R index e0434b2..060cef5 100644 --- a/tests/testthat/test-testThenRank.R +++ b/tests/testthat/test-testThenRank.R @@ -1,336 +1,346 @@ test_that("test-then-rank raises warning for one case", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_warning(ranking <- challenge%>%testThenRank(), "Only one case in task.", fixed = TRUE) expectedRanking <- rbind( "A1" = data.frame(prop_significance = 0, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) +test_that("test-then-rank raises warning for one algorithm", { + data <- rbind( + data.frame(algo="A1", value=0.6, case="C1")) + + challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) + + expect_warning(ranking <- challenge%>%testThenRank(), + "Only one algorithm available in task 'T1'.", fixed = TRUE) +}) + test_that("test-then-rank works with two algorithms, small values are better", { data <- rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%testThenRank() expectedRanking <- rbind( "A1" = data.frame(prop_significance = 1, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("test-then-rank works with two algorithms, large values are better", { data <- rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE) ranking <- challenge%>%testThenRank() expectedRanking <- rbind( "A1" = data.frame(prop_significance = 0, rank = 2), "A2" = data.frame(prop_significance = 1, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("test-then-rank works for ties method 'max'", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A1", value=0.6, case="C2"), data.frame(algo="A2", value=0.8, case="C1"), data.frame(algo="A2", value=0.8, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%testThenRank(ties.method = "max") expectedRanking <- rbind( "A1" = data.frame(prop_significance = 0, rank = 2), "A2" = data.frame(prop_significance = 0, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("test-then-rank raises error for invalid ties method", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A1", value=0.6, case="C2"), data.frame(algo="A2", value=0.8, case="C1"), data.frame(algo="A2", value=0.8, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%testThenRank(ties.method = "maxx"), "'arg' should be one of \"average\", \"first\", \"last\", \"random\", \"max\", \"min\"", fixed = TRUE) }) test_that("test-then-rank raises error for invalid ties method even when no ties present", { data <- rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%testThenRank(ties.method = "maxx"), "'arg' should be one of \"average\", \"first\", \"last\", \"random\", \"max\", \"min\"", fixed = TRUE) }) test_that("test-then-rank raises error when no NA treatment specified but NAs are contained", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%testThenRank(), "argument \"na.treat\" is missing, with no default", fixed = TRUE) }) test_that("test-then-rank raises error when invalid NA treatment specified and NAs are contained", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%testThenRank(na.treat = "na.rmx"), "Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.", fixed = TRUE) }) test_that("specified NA treatment does not influence ranking when no NAs are contained", { data <- rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%testThenRank(na.treat = 0) expectedRanking <- rbind( "A1" = data.frame(prop_significance = 1, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("NAs are replaced by numeric value", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%testThenRank(na.treat = 100.0) expectedRanking <- rbind( "A1" = data.frame(prop_significance = 0, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("NAs are replaced by function value", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) replacementFunction <- function(x) { 0.0 } challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%testThenRank(na.treat = replacementFunction) expectedRanking <- rbind( "A1" = data.frame(prop_significance = 1, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("NAs are removed", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%testThenRank(na.treat = "na.rm") expectedRanking <- rbind( "A1" = data.frame(prop_significance = 0, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("test-then-rank works for multi-task data set with no missing data", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A1", value=0.6, case="C2"), data.frame(algo="A2", value=0.8, case="C1"), data.frame(algo="A2", value=0.8, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%testThenRank() expectedRankingTask1 <- rbind( "A1" = data.frame(prop_significance = 1, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 2)) expectedRankingTask2 <- rbind( "A1" = data.frame(prop_significance = 0, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 1)) expect_equal(ranking$matlist$T1, expectedRankingTask1) expect_equal(ranking$matlist$T2, expectedRankingTask2) }) test_that("NAs are replaced by numeric value in multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A1", value=0.6, case="C2"), data.frame(algo="A2", value=0.8, case="C1"), data.frame(algo="A2", value=0.8, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%testThenRank(na.treat = 0) expectedRankingTask1 <- rbind( "A1" = data.frame(prop_significance = 1, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 2)) expectedRankingTask2 <- rbind( "A1" = data.frame(prop_significance = 0, rank = 1), "A2" = data.frame(prop_significance = 0, rank = 1)) expect_equal(ranking$matlist$T1, expectedRankingTask1) expect_equal(ranking$matlist$T2, expectedRankingTask2) }) test_that("test-then-rank raises error when no NA treatment specified but NAs are contained in multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A1", value=0.2, case="C3"), data.frame(algo="A1", value=0.2, case="C4"), data.frame(algo="A2", value=1.0, case="C1"), data.frame(algo="A2", value=1.0, case="C2"), data.frame(algo="A2", value=1.0, case="C3"), data.frame(algo="A2", value=1.0, case="C4") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A1", value=0.6, case="C2"), data.frame(algo="A2", value=NA, case="C1"), data.frame(algo="A2", value=0.8, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%testThenRank(), "argument \"na.treat\" is missing, with no default", fixed = TRUE) })