diff --git a/tutorial/MultiTask_rank-then-aggregate.R b/tutorial/MultiTask_rank-then-aggregate.R new file mode 100644 index 0000000..7d97424 --- /dev/null +++ b/tutorial/MultiTask_rank-then-aggregate.R @@ -0,0 +1,79 @@ +## Multitask, rank-then-aggregate ranking + +## 1\. Load package + +library(challengeR) + +## 2\. Load data + +if (!requireNamespace("permute", quietly = TRUE)) install.packages("permute") + +n=50 + +set.seed(4) +strip=runif(n,.9,1) +c_ideal=cbind(task="c_ideal", + rbind( + data.frame(alg_name="A1",value=runif(n,.9,1),case=1:n), + data.frame(alg_name="A2",value=runif(n,.8,.89),case=1:n), + data.frame(alg_name="A3",value=runif(n,.7,.79),case=1:n), + data.frame(alg_name="A4",value=runif(n,.6,.69),case=1:n), + data.frame(alg_name="A5",value=runif(n,.5,.59),case=1:n) + )) + +set.seed(1) +c_random=data.frame(task="c_random", + alg_name=factor(paste0("A",rep(1:5,each=n))), + value=plogis(rnorm(5*n,1.5,1)),case=rep(1:n,times=5) + ) + +strip2=seq(.8,1,length.out=5) +a=permute::allPerms(1:5) +c_worstcase=data.frame(task="c_worstcase", + alg_name=c(t(a)), + value=rep(strip2,nrow(a)), + case=rep(1:nrow(a),each=5) + ) +c_worstcase=rbind(c_worstcase, + data.frame(task="c_worstcase",alg_name=1:5,value=strip2,case=max(c_worstcase$case)+1) + ) +c_worstcase$alg_name=factor(c_worstcase$alg_name,labels=paste0("A",1:5)) + +data_matrix=rbind(c_ideal, c_random, c_worstcase) + +## 3 Perform ranking + +### 3.1 Define challenge object + +challenge=as.challenge(data_matrix, + by="task", + algorithm="alg_name", case="case", value="value", + smallBetter = FALSE) + +### 3.2 Perform ranking + +ranking=challenge%>%rankThenAggregate(FUN = mean, + ties.method = "min" + ) + +## 4\. Perform bootstrapping + +library(doParallel) +registerDoParallel(cores=8) +set.seed(1) +ranking_bootstrapped=ranking%>%bootstrap(nboot=1000, parallel=TRUE, progress = "none") +stopImplicitCluster() + +## 5\. Generate the report + +meanRanks=ranking%>%consensus(method = "euclidean") +meanRanks # note that there may be ties (i.e. some algorithms have identical mean rank) + +ranking_bootstrapped %>% + report(consensus=meanRanks, + title="multiTaskChallengeExample", + file = "MultiTask_rank-then-aggregate", + format = "PDF", # format can be "PDF", "HTML" or "Word" + latex_engine="pdflatex",#LaTeX engine for producing PDF output. Options are "pdflatex", "lualatex", and "xelatex" + clean=TRUE #optional. Using TRUE will clean intermediate files that are created during rendering. + ) diff --git a/tutorial/MultiTask_test-then-rank.R b/tutorial/MultiTask_test-then-rank.R new file mode 100644 index 0000000..554823c --- /dev/null +++ b/tutorial/MultiTask_test-then-rank.R @@ -0,0 +1,82 @@ +## Multi-task, test-then-rank based on Wilcoxon signed rank ranking + +## 1\. Load package + +library(challengeR) + +## 2\. Load data + +if (!requireNamespace("permute", quietly = TRUE)) install.packages("permute") + +n=50 + +set.seed(4) +strip=runif(n,.9,1) +c_ideal=cbind(task="c_ideal", + rbind( + data.frame(alg_name="A1",value=runif(n,.9,1),case=1:n), + data.frame(alg_name="A2",value=runif(n,.8,.89),case=1:n), + data.frame(alg_name="A3",value=runif(n,.7,.79),case=1:n), + data.frame(alg_name="A4",value=runif(n,.6,.69),case=1:n), + data.frame(alg_name="A5",value=runif(n,.5,.59),case=1:n) + )) + +set.seed(1) +c_random=data.frame(task="c_random", + alg_name=factor(paste0("A",rep(1:5,each=n))), + value=plogis(rnorm(5*n,1.5,1)),case=rep(1:n,times=5) + ) + +strip2=seq(.8,1,length.out=5) +a=permute::allPerms(1:5) +c_worstcase=data.frame(task="c_worstcase", + alg_name=c(t(a)), + value=rep(strip2,nrow(a)), + case=rep(1:nrow(a),each=5) + ) +c_worstcase=rbind(c_worstcase, + data.frame(task="c_worstcase",alg_name=1:5,value=strip2,case=max(c_worstcase$case)+1) + ) +c_worstcase$alg_name=factor(c_worstcase$alg_name,labels=paste0("A",1:5)) + +data_matrix=rbind(c_ideal, c_random, c_worstcase) + +## 3 Perform ranking + +### 3.1 Define challenge object + +challenge=as.challenge(data_matrix, + by="task", + algorithm="alg_name", case="case", value="value", + smallBetter = FALSE) + +### 3.2 Perform ranking + +#{r, eval=F, echo=T} +ranking=challenge%>%testThenRank(alpha=0.05, + p.adjust.method="none", + na.treat=0, + ties.method = "min" + ) + +## 4\. Perform bootstrapping + +library(doParallel) +registerDoParallel(cores=8) +set.seed(1) +ranking_bootstrapped=ranking%>%bootstrap(nboot=1000, parallel=TRUE, progress = "none") +stopImplicitCluster() + +## 5\. Generate the report + +meanRanks=ranking%>%consensus(method = "euclidean") +meanRanks # note that there may be ties (i.e. some algorithms have identical mean rank) + +ranking_bootstrapped %>% + report(consensus=meanRanks, + title="multiTaskChallengeExample", + file = "MultiTask_test-then-rank", + format = "PDF", # format can be "PDF", "HTML" or "Word" + latex_engine="pdflatex",#LaTeX engine for producing PDF output. Options are "pdflatex", "lualatex", and "xelatex" + clean=TRUE #optional. Using TRUE will clean intermediate files that are created during rendering. + ) diff --git a/tutorial/SingleTask_aggregate-then-rank.R b/tutorial/SingleTask_aggregate-then-rank.R new file mode 100644 index 0000000..c1004cd --- /dev/null +++ b/tutorial/SingleTask_aggregate-then-rank.R @@ -0,0 +1,71 @@ +## Single task, aggregate-then-rank ranking + +## 1\. Load package + +library(challengeR) + +## 2\. Load data + +if (!requireNamespace("permute", quietly = TRUE)) install.packages("permute") + +n=50 + +set.seed(4) +strip=runif(n,.9,1) +c_ideal=cbind(task="c_ideal", + rbind( + data.frame(alg_name="A1",value=runif(n,.9,1),case=1:n), + data.frame(alg_name="A2",value=runif(n,.8,.89),case=1:n), + data.frame(alg_name="A3",value=runif(n,.7,.79),case=1:n), + data.frame(alg_name="A4",value=runif(n,.6,.69),case=1:n), + data.frame(alg_name="A5",value=runif(n,.5,.59),case=1:n) + )) + +set.seed(1) +c_random=data.frame(task="c_random", + alg_name=factor(paste0("A",rep(1:5,each=n))), + value=plogis(rnorm(5*n,1.5,1)),case=rep(1:n,times=5) + ) + +strip2=seq(.8,1,length.out=5) +a=permute::allPerms(1:5) +c_worstcase=data.frame(task="c_worstcase", + alg_name=c(t(a)), + value=rep(strip2,nrow(a)), + case=rep(1:nrow(a),each=5) + ) +c_worstcase=rbind(c_worstcase, + data.frame(task="c_worstcase",alg_name=1:5,value=strip2,case=max(c_worstcase$case)+1) + ) +c_worstcase$alg_name=factor(c_worstcase$alg_name,labels=paste0("A",1:5)) + +data_matrix=rbind(c_ideal, c_random, c_worstcase) + +## 3 Perform ranking + +### 3.1 Define challenge object + +dataSubset=subset(data_matrix, task=="c_random") + +challenge=as.challenge(dataSubset, algorithm="alg_name", case="case", value="value", smallBetter = FALSE) + +### 3.2 Perform ranking + +ranking=challenge%>%aggregateThenRank(FUN = mean, na.treat=0, ties.method = "min") + +## 4\. Perform bootstrapping + +library(doParallel) +registerDoParallel(cores=8) +set.seed(1) +ranking_bootstrapped=ranking%>%bootstrap(nboot=1000, parallel=TRUE, progress = "none") +stopImplicitCluster() + +## 5\. Generate the report +ranking_bootstrapped %>% + report(title="singleTaskChallengeExample", # used for the title of the report + file = "SingleTask_aggregate-then-rank", + format = "PDF", # format can be "PDF", "HTML" or "Word" + latex_engine="pdflatex", #LaTeX engine for producing PDF output. Options are "pdflatex", "lualatex", and "xelatex" + clean=TRUE #optional. Using TRUE will clean intermediate files that are created during rendering. + ) diff --git a/tutorial/tutorial.Rmd b/tutorial/tutorial.Rmd new file mode 100644 index 0000000..2cb9863 --- /dev/null +++ b/tutorial/tutorial.Rmd @@ -0,0 +1,87 @@ +--- +title: Tutorial to get started with "Methods and open-source toolkit for analyzing and visualizing challenge results" +output: + github_document: + toc: yes + toc_depth: 1 + pdf_document: + toc: yes + toc_depth: '3' +editor_options: + chunk_output_type: console +--- + +```{r, echo = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + # fig.path = "README-", + fig.width = 9, + fig.height = 5, + width=160 +) +``` + +# Introduction + +This tutorial intends to give customized scripts to genrate reports quicky, whithout going through all the installation and usage steps. + +The tutorial contains the following scripts, which are included in the "Tutorial" folder: + +- SingleTask_aggregate-then-rank.R +- MultiTask_rank-then-aggregate.R +- MultiTask_test-then-rank.R + +How to use the tutorial scripts in RStudio: + +1. Specify where the report should be generated. +```{r, eval=F} +setwd("myWorkingDirectoryFilePath") +``` + +2. Open the script. + +3. Select all the text from the script file (CTRL+a), and run all the code (CTRL+enter). + +4. The report will be generated in the previously specified working directory ("myWorkingDirectoryFilePath"). + +5. Check out the report, and the script to modify and adapt the desired parameters. + + +# Usage + +Each script contains the following steps, as described in the README: + +1. Load package + +2. Load data (randomly generated?) + +3. Perform ranking +- Define challenge object +- Perform ranking + +4. Uncertainity analisys (bootstrapping) + +5. Generate report + +The scrips will be now explained in more detail: + +#### SingleTask_aggregate-then-rank.R + +As the name indicates, in this script a single task evaluation will be performed. The applied ranking method is "metric-based aggregation". It is the most commonly applied method, and it begins by aggregating metric values across all test cases for each algorithm. This aggregate is then used to compute a rank for each algorithm. + +#### MultiTask_rank-then-aggregate.R + +As the name indicates, in this script a multi task evaluation will be performed. The applied ranking method is "case-based aggregation". It is the second most commonly applied method, and it begins with computing a rank for each test case for each algorithm (”rank first”). The final rank is based on the aggregated test-case ranks. Distance-based approaches for rank aggregation can also be used. + +#### MultiTask_test-then-rank.R + +As the name indicates, in this script a multi task evaluation will be performed. The applied ranking method is "significance ranking". In a complementary approach, statistical hypothesis tests are computed for each possible pair of algorithms to assess differences in metric values between the algorithms. Then ranking is performed according to the resulting relations or according to the number of significant one-sided test results. In the latter case, if algorithms have the same number of significant test results then they obtain the same rank. Various test statistics can be used. + + +For more hints, see the README. + +# Terms of use +Licenced under GPL-3. If you use this software for a publication, cite + +Wiesenfarth, M., Reinke, A., Landman, B.A., Cardoso, M.J., Maier-Hein, L. and Kopp-Schneider, A. (2019). Methods and open-source toolkit for analyzing and visualizing challenge results. *arXiv preprint arXiv:1910.05121*