diff --git a/R/subset.R b/R/subset.R index 67f5762..9f8285f 100644 --- a/R/subset.R +++ b/R/subset.R @@ -1,111 +1,119 @@ subset <- function(x,...) UseMethod("subset") subset.default <- function(x, ...) base::subset(x, ...) subset.comparedRanks.list=function(x, tasks,...){ res=x[tasks] class(res)="comparedRanks.list" res } subset.list=function(x, tasks,...){ x[tasks] } subset.aggregated.list=function(x, tasks,...){ call=match.call(expand.dots = T) if (!is.null(as.list(call$top))) stop("Subset of algorithms only sensible for single task challenges.") matlist=x$matlist[tasks] res=list(matlist=matlist, call=list(x$call,call), data=x$data, FUN = . %>% (x$FUN) %>% (call) ) class(res)=class(x) res } which.top=function(object, top){ mat=object$mat[object$mat$rank<=top,] rownames(mat)#[order(mat$rank)] } subset.ranked.list <- function(x, top, tasks,...) { # if (!missing(tasks) & length(x$matlist) == 1) stop("Subset of tasks only sensible for multi task challenges.") if (!missing(top) & length(x$matlist) != 1) stop("Subset of algorithms only sensible for single-task challenges. Otherwise no consensus ranking is possible.") if (!missing(top)){ taskMat <- x$matlist[[1]] taskData <- x$data[[1]] objectTop=x objectTop$matlist[[1]]=taskMat[taskMat$rank<=top,] taskMatRowNames <- rownames(objectTop$matlist[[1]]) attribute <- attr(objectTop$data,"algorithm") selectedRowNames <- taskData[[attribute]] %in% taskMatRowNames objectTop$data[[1]] <- taskData[selectedRowNames,] if (is.factor(objectTop$data[[1]][[attribute]])) objectTop$data[[1]][[attribute]] <- droplevels(objectTop$data[[1]][[attribute]]) objectTop$fulldata=x$data return(objectTop) } else if (!missing(tasks)){ + + if (is.character(tasks) && any(!tasks%in%names(x$matlist))) { + stop("There is/are no task(s) called ",paste(tasks[!tasks%in%names(x$matlist)],collapse = " and "),".") + } res=list(matlist=x$matlist[tasks], data=x$data[tasks], call=x$call, FUN=x$FUN, FUN.list=x$FUN.list ) attrib=attributes(x$data) attrib$names=attr(res$data,"names") attributes(res$data)=attrib class(res)=c("ranked.list","list") return(res) } } subset.bootstrap.list=function(x, top, tasks, ...) { # if (!missing(tasks) & length(x$matlist) == 1) stop("Subset of tasks only sensible for multi task challenges.") if (!missing(top) & length(x$matlist) != 1) stop("Subset of algorithms only sensible for single-task challenges. Otherwise no consensus ranking is possible.") if (!missing(top)){ objectTop <- subset.ranked.list(x, top = top) objectTop$bootsrappedRanks[[1]] <- objectTop$bootsrappedRanks[[1]][rownames(objectTop$matlist[[1]]),] objectTop$bootsrappedAggregate[[1]] <- objectTop$bootsrappedAggregate[[1]][rownames(objectTop$matlist[[1]]),] return(objectTop) } else if (!missing(tasks)){ + if (is.character(tasks) && any(!tasks%in%names(x$matlist))) { + stop("There is/are no task(s) called ",paste(tasks[!tasks%in%names(x$matlist)],collapse = " and "),".") + } + res=list(bootsrappedRanks=x$bootsrappedRanks[tasks], bootsrappedAggregate=x$bootsrappedAggregate[tasks], matlist=x$matlist[tasks], data=x$data[tasks], FUN=x$FUN ) attrib=attributes(x$data) attrib$names=attr(res$data,"names") attributes(res$data)=attrib class(res)="bootstrap.list" return(res) } } diff --git a/tests/testthat/test-taskSubset.R b/tests/testthat/test-taskSubset.R index 2efd8ab..8ccaebd 100644 --- a/tests/testthat/test-taskSubset.R +++ b/tests/testthat/test-taskSubset.R @@ -1,185 +1,170 @@ test_that("extraction of task subset works for multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A2", value=0.3, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.7, case="C2"), data.frame(algo="A2", value=0.8, case="C2"), data.frame(algo="A3", value=0.9, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") rankingSubset <- subset(ranking, tasks=c("T2")) expect_equal(length(rankingSubset$matlist), 1) expect_is(rankingSubset$matlist$T2, "data.frame") expect_equal(length(rankingSubset$data), 1) expect_is(rankingSubset$data$T2, "data.frame") }) test_that("extraction of task subset works for single-task data set", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") rankingSubset <- subset(ranking, tasks=c("T1")) expect_equal(length(rankingSubset$matlist), 1) expect_is(rankingSubset$matlist$T1, "data.frame") expect_equal(length(rankingSubset$data), 1) expect_is(rankingSubset$data$T1, "data.frame") }) -test_that("extraction of task subset does not raise an error for invalid task name", { +test_that("extraction of task subset raises an error for invalid task name", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") - rankingSubset <- subset(ranking, tasks=c("T1x")) - - expect_equal(length(rankingSubset$matlist), 1) - expect_equal(rankingSubset$matlist$T1, NULL) - - expect_equal(length(rankingSubset$data), 1) - expect_equal(rankingSubset$data$T1, NULL) + expect_error(subset(ranking, tasks=c("T1x")), + "There is/are no task(s) called T1x.", fixed=TRUE) }) test_that("extraction of task subset from bootstrap ranking works for multi-task data set", { dataTask1 <- cbind(task="T1", rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2") )) dataTask2 <- cbind(task="T2", rbind( data.frame(algo="A1", value=0.2, case="C1"), data.frame(algo="A2", value=0.3, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.7, case="C2"), data.frame(algo="A2", value=0.8, case="C2"), data.frame(algo="A3", value=0.9, case="C2") )) data <- rbind(dataTask1, dataTask2) challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) rankingBootstrappedSubset <- subset(rankingBootstrapped, tasks=c("T2")) expect_equal(length(rankingBootstrappedSubset$matlist), 1) expect_is(rankingBootstrappedSubset$matlist$T2, "data.frame") expect_equal(length(rankingBootstrappedSubset$data), 1) expect_is(rankingBootstrappedSubset$data$T2, "data.frame") expect_equal(length(rankingBootstrappedSubset$bootsrappedRanks), 1) expect_is(rankingBootstrappedSubset$bootsrappedRanks$T2, "data.frame") expect_equal(length(rankingBootstrappedSubset$bootsrappedAggregate), 1) expect_is(rankingBootstrappedSubset$bootsrappedAggregate$T2, "data.frame") }) test_that("extraction of task subset from bootstrap ranking works for single-task data set", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) rankingBootstrappedSubset <- subset(rankingBootstrapped, tasks=c("T1")) expect_equal(length(rankingBootstrappedSubset$matlist), 1) expect_is(rankingBootstrappedSubset$matlist$T1, "data.frame") expect_equal(length(rankingBootstrappedSubset$data), 1) expect_is(rankingBootstrappedSubset$data$T1, "data.frame") expect_equal(length(rankingBootstrappedSubset$bootsrappedRanks), 1) expect_is(rankingBootstrappedSubset$bootsrappedRanks$T1, "data.frame") expect_equal(length(rankingBootstrappedSubset$bootsrappedAggregate), 1) expect_is(rankingBootstrappedSubset$bootsrappedAggregate$T1, "data.frame") }) -test_that("extraction of task subset from bootstrap ranking does not raise an error for invalid task name", { +test_that("extraction of task subset from bootstrap ranking raises an error for invalid task name", { data <- rbind( data.frame(algo="A1", value=0.8, case="C1"), data.frame(algo="A2", value=0.6, case="C1"), data.frame(algo="A3", value=0.4, case="C1"), data.frame(algo="A1", value=0.2, case="C2"), data.frame(algo="A2", value=0.1, case="C2"), data.frame(algo="A3", value=0.0, case="C2")) challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) ranking <- challenge%>%aggregateThenRank(FUN=mean, ties.method="min") set.seed(1) rankingBootstrapped <- ranking%>%bootstrap(nboot=10) - rankingBootstrappedSubset <- subset(rankingBootstrapped, tasks=c("T1x")) - - expect_equal(length(rankingBootstrappedSubset$matlist), 1) - expect_equal(rankingBootstrappedSubset$matlist$T1, NULL) + expect_error(subset(rankingBootstrapped, tasks=c("T1x")), + "There is/are no task(s) called T1x.", fixed=TRUE) - expect_equal(length(rankingBootstrappedSubset$data), 1) - expect_equal(rankingBootstrappedSubset$data$T1, NULL) - - expect_equal(length(rankingBootstrappedSubset$bootsrappedRanks), 1) - expect_equal(rankingBootstrappedSubset$bootsrappedRanks$T1, NULL) - - expect_equal(length(rankingBootstrappedSubset$bootsrappedAggregate), 1) - expect_equal(rankingBootstrappedSubset$bootsrappedAggregate$T1, NULL) })