Page MenuHomePhabricator

test_bootstrap_replication.R

Authored By
eisenman
Oct 13 2022, 2:37 PM
Size
3 KB
Referenced Files
None
Subscribers
None

test_bootstrap_replication.R

library(challengeR)
data <- read.csv(system.file("extdata", "data_matrix.csv", package = "challengeR", mustWork = TRUE))
challenge <- as.challenge(data, by = "task", algorithm = "alg_name", case = "case", value = "value", smallBetter = FALSE)
ranking <- challenge%>%rankThenAggregate(FUN = mean,
ties.method = "min"
)
# sequential (no parallelization) works
set.seed(1)
ranking_bootstrapped1 <- ranking%>%bootstrap(nboot = 100)
set.seed(1)
ranking_bootstrapped2 <- ranking%>%bootstrap(nboot = 100)
testthat::expect_equal(ranking_bootstrapped1, ranking_bootstrapped2)
# parallelized does not!
library(doParallel)
registerDoParallel(cores = 8)
set.seed(1)
ranking_bootstrapped1 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
set.seed(1)
ranking_bootstrapped2 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
testthat::expect_equal(ranking_bootstrapped1, ranking_bootstrapped2)
stopImplicitCluster()
# changing the random seed generator does the trick, using kind = "L'Ecuyer-CMRG"
registerDoParallel(cores = 8)
set.seed(1, kind = "L'Ecuyer-CMRG")
ranking_bootstrapped1 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
set.seed(1, kind = "L'Ecuyer-CMRG")
ranking_bootstrapped2 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
testthat::expect_equal(ranking_bootstrapped1, ranking_bootstrapped2)
stopImplicitCluster()
# care has to be taken: now default random seed generator is changed to "L'Ecuyer-CMRG", thus using set.seed(1) now is implicitly equivalent (until restart) to set.seed(1, kind = "L'Ecuyer-CMRG")
# help from ?set.seed:
# "
# The use of kind = NULL, normal.kind = NULL or sample.kind = NULL in RNGkind or set.seed
# selects the currently-used generator (including that used in the previous session if the
# workspace has been restored): if no generator has been used it selects "default".
# "
# Thus, now the follwoing will work without error:
registerDoParallel(cores = 8)
set.seed(1)
ranking_bootstrapped1 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
set.seed(1)
ranking_bootstrapped2 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
testthat::expect_equal(ranking_bootstrapped1, ranking_bootstrapped2)
stopImplicitCluster()
# but changing random generator back not:
registerDoParallel(cores = 8)
set.seed(1, kind="default")
ranking_bootstrapped1 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
set.seed(1, kind="default")
ranking_bootstrapped2 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
testthat::expect_equal(ranking_bootstrapped1, ranking_bootstrapped2)
stopImplicitCluster()
# # Still note that parallelized wont yield the same results as sequential:
# registerDoParallel(cores = 8)
# set.seed(1, kind = "L'Ecuyer-CMRG")
# ranking_bootstrapped1 <- ranking%>%bootstrap(nboot = 100, parallel = TRUE, progress = "none")
# stopImplicitCluster()
# set.seed(1, kind = "L'Ecuyer-CMRG")
# ranking_bootstrapped2 <- ranking%>%bootstrap(nboot = 100, parallel = FALSE)
# testthat::expect_equal(ranking_bootstrapped1, ranking_bootstrapped2)

File Metadata

Mime Type
text/plain
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1549075
Default Alt Text
test_bootstrap_replication.R (3 KB)

Event Timeline