diff --git a/R/Aggregate.R b/R/Aggregate.R index ed50eca..a93f190 100644 --- a/R/Aggregate.R +++ b/R/Aggregate.R @@ -1,167 +1,163 @@ Aggregate <- function(object,...) UseMethod("Aggregate") Aggregate.default <- function(object, ...) aggregate(object,...) #stats::aggregate Aggregate.data.frame <-function(object, x, - algorithm, + algorithm, FUN=mean, na.treat="na.rm", #can be na.rm, numeric value or function - case, - alpha=0.05, p.adjust.method="none", + case, + alpha=0.05, p.adjust.method="none", alternative="one.sided", test.fun=function(x,y) wilcox.test(x, y, alternative = alternative, - exact=FALSE, + exact=FALSE, paired = TRUE)$p.value, - largeBetter=TRUE, # only needed for significance + largeBetter=TRUE, # only needed for significance ... ){ - call=match.call(expand.dots = T) + call=match.call(expand.dots = T) if (is.numeric(na.treat)) object[,x][is.na(object[,x])]=na.treat else if (is.function(na.treat)) object[,x][is.na(object[,x])]=na.treat(object[,x][is.na(object[,x])]) else if (na.treat=="na.rm") object=object[!is.na(object[,x]),] - else stop("Argument \"na.treat\" is invalid. It can be \"na.rm\", numeric value or function.") + else stop("Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.") if (is.character(FUN) && FUN=="significance"){ if(missing(case)| missing(largeBetter)| missing(alpha)) stop("If FUN='significance' arguments case, largeBetter and alpha need to be given") if (length(unique(object[[algorithm]]))<=1){ warning("only one ", algorithm, " available") agg=data.frame() } else { agg = significance(object, x, algorithm, case, alpha, largeBetter, p.adjust.method = p.adjust.method, alternative = alternative, ... ) } isSignificance=TRUE } else { FUNname=as.character(call$FUN) if (is.character(FUN)) FUN=try(eval(parse(text=FUN)),silent = T) - + if (!is.function(FUN)) stop("FUN has to be a function (possibly as character) or 'significance'") agg <- aggregate(object[, x], by = list(object[, algorithm]), FUN = function(z) do.call(FUN, args = list(x = z)) ) names(agg)=c(algorithm, paste0(x,"_", FUNname)) rownames(agg)=agg[,1] agg=agg[,-1,drop=F] isSignificance=FALSE } res=list(FUN = . %>% (call), FUN.list=list(FUN), call=list(call), data=object, mat=agg, isSignificance= isSignificance) - + class(res)=c("aggregated",class(res)) res - + } Aggregate.list <-function(object, x, algorithm, FUN = mean, na.treat = "na.rm", parallel = FALSE, progress = "none", case, alpha = 0.05, p.adjust.method = "none", alternative = "one.sided", test.fun = function(x, y) wilcox.test(x, y, alternative = alternative, exact = FALSE, paired = TRUE)$p.value, largeBetter = TRUE, # only needed for significance ... ) { - call=match.call(expand.dots = T) + call=match.call(expand.dots = T) if (is.character(FUN) && FUN=="significance"){ if(missing(case)| missing(largeBetter)| missing(alpha)) stop("If FUN='significance' arguments case, largeBetter and alpha need to be given") - matlist=llply(1:length(object), - function(id){ + matlist=llply(1:length(object), + function(id){ piece=object[[id]] if (length(unique(piece[[algorithm]]))<=1){ warning("only one ", algorithm, " available in element ", names(object)[id]) return(data.frame("prop_significance"=rep(NA,length(unique(piece[[algorithm]]))), - row.names = unique(piece[[algorithm]]))) - } + row.names = unique(piece[[algorithm]]))) + } if (is.numeric(na.treat)) piece[,x][is.na(piece[,x])]=na.treat else if (is.function(na.treat)) piece[,x][is.na(piece[,x])]=na.treat(piece[,x][is.na(piece[,x])]) else if (na.treat=="na.rm") piece=piece[!is.na(piece[,x]),] - xmean <- significance(piece, - x, - algorithm, - case, - alpha, + else stop("Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.") + + xmean <- significance(piece, + x, + algorithm, + case, + alpha, p.adjust.method=p.adjust.method, largeBetter, alternative=alternative, ...) class(xmean)=c("aggregated", class(xmean)) xmean - }, + }, .parallel=parallel, .progress=progress ) isSignificance=TRUE - + } else { if (is.character(FUN)) FUN=try(eval(parse(text=FUN)), silent = T) FUNname=as.character(call$FUN) if (!is.function(FUN)) stop("FUN has to be a function (possibly as character) or 'significance'") - - matlist=llply(object, - function(piece){ + + matlist=llply(object, + function(piece){ if (is.numeric(na.treat)) piece[,x][is.na(piece[,x])]=na.treat else if (is.function(na.treat)) piece[,x][is.na(piece[,x])]=na.treat(piece[,x][is.na(piece[,x])]) else if (na.treat=="na.rm") piece=piece[!is.na(piece[,x]),] - - xmean <- aggregate(piece[,x], - by=list(piece[,algorithm]), + else stop("Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.") + + xmean <- aggregate(piece[,x], + by=list(piece[,algorithm]), FUN=function(z) do.call(FUN,args=list(x=z))) names(xmean)=c(algorithm, paste0(x,"_",FUNname)) rownames(xmean)=xmean[,1] xmean=xmean[,-1,drop=F] xmean - }, + }, .parallel=parallel, .progress=progress ) isSignificance=FALSE } names(matlist)=names(object) res=list(FUN = . %>% (call), FUN.list=list(FUN), call=list(call), data=object, - matlist=matlist, + matlist=matlist, isSignificance=isSignificance ) - + class(res)=c("aggregated.list",class(res)) res } - - - - - - - diff --git a/tests/testthat/test-aggregateThenRank.R b/tests/testthat/test-aggregateThenRank.R index a4f55e2..df03703 100644 --- a/tests/testthat/test-aggregateThenRank.R +++ b/tests/testthat/test-aggregateThenRank.R @@ -1,346 +1,346 @@ test_that("aggregate-than-rank by mean works with two algorithms for one case, small values are better", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = mean) expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.6, rank = 1), "A2" = data.frame(value_FUN = 0.8, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank by mean works with two algorithms (reverse order) for one case, small values are better", { data <- rbind( data.frame(algo = "A2", value = 0.8, case = "C1"), data.frame(algo = "A1", value = 0.6, case = "C1")) challenge <- as.challenge(data, taskName="T1", algorithm = "algo", case = "case", value = "value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = mean) expectedRanking <- rbind("A2" = data.frame(value_FUN = 0.8, rank = 2), "A1" = data.frame(value_FUN = 0.6, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank by mean works with two algorithms for one case, large values are better", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE) ranking <- challenge%>%aggregateThenRank(FUN = mean) expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.6, rank = 2), "A2" = data.frame(value_FUN = 0.8, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank by mean works with two algorithms (reverse order) for one case, large values are better", { data <- rbind( data.frame(algo = "A2", value = 0.8, case = "C1"), data.frame(algo = "A1", value = 0.6, case = "C1")) challenge <- as.challenge(data, taskName="T1", algorithm = "algo", case = "case", value = "value", smallBetter = FALSE) ranking <- challenge%>%aggregateThenRank(FUN = mean) expectedRanking <- rbind("A2" = data.frame(value_FUN = 0.8, rank = 1), "A1" = data.frame(value_FUN = 0.6, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank raises error for invalid aggregation function", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.6, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%aggregateThenRank(FUN = meanx), "object 'meanx' not found", fixed = TRUE) }) test_that("aggregate-than-rank by mean works with two algorithms for one case and 'min' as ties method", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.6, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = mean, ties.method = "min") expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.6, rank = 1), "A2" = data.frame(value_FUN = 0.6, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank by mean works with two algorithms for one case and 'max' as ties method", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.6, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = mean, ties.method = "max") expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.6, rank = 2), "A2" = data.frame(value_FUN = 0.6, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank raises error for invalid ties method", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.6, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%aggregateThenRank(FUN = mean, ties.method = "maxx"), "'arg' should be one of \"average\", \"first\", \"last\", \"random\", \"max\", \"min\"", fixed = TRUE) }) test_that("aggregate-than-rank raises error for invalid ties method even when no ties present", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%aggregateThenRank(FUN = mean, ties.method = "maxx"), "'arg' should be one of \"average\", \"first\", \"last\", \"random\", \"max\", \"min\"", fixed = TRUE) }) test_that("aggregate-than-rank by mean works with two algorithms for two cases", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A1", value=0.4, case="C2"), data.frame(algo="A2", value=0.8, case="C1"), data.frame(algo="A2", value=1.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = mean) expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.5, rank = 1), "A2" = data.frame(value_FUN = 0.9, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank by median works with two algorithms for two cases", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A1", value=0.4, case="C2"), data.frame(algo="A2", value=0.8, case="C1"), data.frame(algo="A2", value=1.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = median) expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.5, rank = 1), "A2" = data.frame(value_FUN = 0.9, rank = 2)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank by mean works with one algorithm for one case", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = mean) expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.6, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank raises error when no NA treatment specified but NAs are contained", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE) expect_error(challenge%>%aggregateThenRank(FUN = mean), "argument \"na.treat\" is missing, with no default", fixed = TRUE) }) test_that("aggregate-than-rank raises error when invalid NA treatment specified and NAs are contained", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE) expect_error(challenge%>%aggregateThenRank(FUN = mean, na.treat = "na.rmx"), - "Argument \"na.treat\" is invalid. It can be \"na.rm\", numeric value or function.", fixed = TRUE) + "Argument 'na.treat' is invalid. It can be 'na.rm', numeric value or function.", fixed = TRUE) }) test_that("specified NA treatment does not influence ranking when no NAs are contained", { data <- rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE) ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = 0) expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.6, rank = 2), "A2" = data.frame(value_FUN = 0.8, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("NAs are replaced by numeric value", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE) ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = 0) expectedRanking <- rbind( "A1" = data.frame(value_FUN = 0.0, rank = 2), "A2" = data.frame(value_FUN = 0.8, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("NAs are replaced by function value", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) replacementFunction <- function(x) { -1 } challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE) ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = replacementFunction) expectedRanking <- rbind( "A1" = data.frame(value_FUN = -1.0, rank = 2), "A2" = data.frame(value_FUN = 0.8, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("NAs are removed", { data <- rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A2", value=0.8, case="C1")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter = FALSE) ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = "na.rm") expectedRanking <- rbind( "A2" = data.frame(value_FUN = 0.8, rank = 1)) expect_equal(ranking$matlist$T1, expectedRanking) }) test_that("aggregate-than-rank by mean works for multi-task challenge (2 tasks in data set), no missing data", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.8, case="C1") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.5, case="C1"), data.frame(algo="A2", value=0.4, case="C1") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = mean) expectedRankingTask1 <- rbind( "A1" = data.frame(value_FUN = 0.6, rank = 1), "A2" = data.frame(value_FUN = 0.8, rank = 2)) expectedRankingTask2 <- rbind( "A1" = data.frame(value_FUN = 0.5, rank = 2), "A2" = data.frame(value_FUN = 0.4, rank = 1)) expect_equal(ranking$matlist$T1, expectedRankingTask1) expect_equal(ranking$matlist$T2, expectedRankingTask2) }) test_that("NAs are replaced by numeric value in multi-task challenge (2 tasks in data set)", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.8, case="C1") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A2", value=0.4, case="C1") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE) ranking <- challenge%>%aggregateThenRank(FUN = mean, na.treat = 100) expectedRankingTask1 <- rbind( "A1" = data.frame(value_FUN = 0.6, rank = 1), "A2" = data.frame(value_FUN = 0.8, rank = 2)) expectedRankingTask2 <- rbind( "A1" = data.frame(value_FUN = 100.0, rank = 2), "A2" = data.frame(value_FUN = 0.4, rank = 1)) expect_equal(ranking$matlist$T1, expectedRankingTask1) expect_equal(ranking$matlist$T2, expectedRankingTask2) }) test_that("aggregate-than-rank raises error when no NA treatment specified but NAs are contained in multi-task challenge (2 tasks in data set)", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.6, case="C1"), data.frame(algo="A2", value=0.8, case="C1") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=NA, case="C1"), data.frame(algo="A2", value=0.4, case="C1") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter = TRUE) expect_error(challenge%>%aggregateThenRank(FUN = mean), "argument \"na.treat\" is missing, with no default", fixed = TRUE) })