diff --git a/R/violin.R b/R/violin.R index a62c6e0..e8c2968 100644 --- a/R/violin.R +++ b/R/violin.R @@ -1,81 +1,87 @@ violin <- function(x,...) UseMethod("violin") violin.default <- function(x, ...) stop("not implemented for this class") violin.bootstrap=function(x,...){ a=list(bootsrappedRanks=list(x$bootsrappedRanks), matlist=list(x$mat)) names(a$bootsrappedRanks)=names(a$matlist)="" violin.bootstrap.list(a,...) - + } violin.bootstrap.list=function(x,...){ ken=melt(kendall.bootstrap.list(x)) colnames(ken)[2]="Task" cat("\n\nSummary Kendall's tau\n") ss=ken%>%group_by(Task)%>% summarise(mean=mean(value,na.rm=T), median=median(value,na.rm=T), q25=quantile(value,probs = .25,na.rm=T), - q75=quantile(value,probs = .75,na.rm=T))%>% + q75=quantile(value,probs = .75,na.rm=T))%>% arrange(desc(median)) - + print(as.data.frame(ss)) - - ken%>%mutate(Task=factor(.data$Task, + + xAxisText <- element_blank() + + # Show task names as tick mark labels only for multi-task data set + if (length(x$data) > 1) { + xAxisText <- element_text(angle = 90, vjust = 0.5, hjust = 1) + } + + ken%>%mutate(Task=factor(.data$Task, levels=ss$Task))%>% ggplot(aes(Task,value))+ geom_violin(alpha=.3, color=NA, fill="blue")+ - geom_boxplot(width=0.1, + geom_boxplot(width=0.1, fill="white")+ - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), + theme(axis.text.x = xAxisText, legend.position = "none")+ ylab("Kendall's tau")+ scale_y_continuous(limits=c(min(min(ken$value),0), max(max(ken$value),1))) } kendall.bootstrap.list=function(x){ ken=lapply(1:length(x$bootsrappedRanks),function(Task){ id=match(rownames( x$bootsrappedRanks[[Task]]), rownames(x$matlist[[Task]]) ) - sapply(x$bootsrappedRanks[[Task]], + sapply(x$bootsrappedRanks[[Task]], function(bootSample) suppressWarnings(kendall(bootSample, x$matlist[[Task]]$rank[id]))) } ) names(ken)=names((x$bootsrappedRanks)) - + if (sum(is.na(x))>0){ cat("Bootstrap samples without variability in rankings (all algorithms ranked 1) excluded.\n Frequency of such samples by task:\n",fill = T) sapply(ken,function(x) sum(is.na(x))) } - - + + return(ken) - + } density.bootstrap.list=function(x,...){ ken=melt(kendall.bootstrap.list(x)) colnames(ken)[2]="Task" - + cat("\n\nSummary Kendall's tau\n") ss=ken%>%group_by(Task)%>% summarise(mean=mean(value,na.rm=T), median=median(value,na.rm=T), q25=quantile(value,probs = .25,na.rm=T), - q75=quantile(value,probs = .75,na.rm=T))%>% + q75=quantile(value,probs = .75,na.rm=T))%>% arrange(desc(median)) - + print(as.data.frame(ss)) - + ggplot(ken)+ geom_density(aes(value,fill=Task),alpha=.3,color=NA) } - diff --git a/tests/testthat/test-violinPlot.R b/tests/testthat/test-violinPlot.R new file mode 100644 index 0000000..eee36f9 --- /dev/null +++ b/tests/testthat/test-violinPlot.R @@ -0,0 +1,52 @@ +test_that("violin plot for visualizing ranking stability returns one plot for single-task data set", { + data <- rbind( + data.frame(algo="A1", value=0.8, case="C1"), + data.frame(algo="A2", value=0.6, case="C1"), + data.frame(algo="A3", value=0.4, case="C1"), + data.frame(algo="A1", value=0.2, case="C2"), + data.frame(algo="A2", value=0.1, case="C2"), + data.frame(algo="A3", value=0.0, case="C2")) + + challenge <- as.challenge(data, taskName="T1", algorithm="algo", case="case", value="value", smallBetter=FALSE) + + ranking <- challenge%>%aggregateThenRank(FUN=median, ties.method="min") + + set.seed(1) + rankingBootstrapped <- ranking%>%bootstrap(nboot=10) + + actualPlot <- violin(rankingBootstrapped) + expect_is(actualPlot, "ggplot") +}) + +test_that("violin plot for visualizing ranking stability returns one plot for multi-task data set", { + dataTask1 <- cbind(task="T1", + rbind( + data.frame(algo="A1", value=0.8, case="C1"), + data.frame(algo="A2", value=0.6, case="C1"), + data.frame(algo="A3", value=0.4, case="C1"), + data.frame(algo="A1", value=0.2, case="C2"), + data.frame(algo="A2", value=0.1, case="C2"), + data.frame(algo="A3", value=0.0, case="C2") + )) + dataTask2 <- cbind(task="T2", + rbind( + data.frame(algo="A1", value=0.2, case="C1"), + data.frame(algo="A2", value=0.3, case="C1"), + data.frame(algo="A3", value=0.4, case="C1"), + data.frame(algo="A1", value=0.7, case="C2"), + data.frame(algo="A2", value=0.8, case="C2"), + data.frame(algo="A3", value=0.9, case="C2") + )) + + data <- rbind(dataTask1, dataTask2) + + challenge <- as.challenge(data, by="task", algorithm="algo", case="case", value="value", smallBetter=FALSE) + + ranking <- challenge%>%aggregateThenRank(FUN=median, ties.method="min") + + set.seed(1) + rankingBootstrapped <- ranking%>%bootstrap(nboot=10) + + actualPlot <- violin(rankingBootstrapped) + expect_is(actualPlot, "ggplot") +})