Comparing sizes when training on same or other groups

The goal of this vignette is explain how to use ResamplingSameOtherSizesCV for various kinds of cross-validation.

Comparing training on Same/Other/All subsets

N <- 2100
abs.x <- 70
set.seed(2)
x.vec <- runif(N, -abs.x, abs.x)
str(x.vec)
#>  num [1:2100] -44.1 28.3 10.3 -46.5 62.1 ...
library(data.table)
(task.dt <- data.table(
  x=x.vec,
  y = sin(x.vec)+rnorm(N,sd=0.5)))
#>               x           y
#>           <num>       <num>
#>    1: -44.11648 -0.40781530
#>    2:  28.33237 -0.08520601
#>    3:  10.26569 -1.23266284
#>    4: -46.47273 -1.36225125
#>    5:  62.13751 -1.33779346
#>   ---                      
#> 2096:  60.83765 -0.10678010
#> 2097:  55.71469 -0.92403513
#> 2098:  14.31045  1.04519820
#> 2099:  27.18008  1.67815828
#> 2100:  23.67202 -0.26881102
library(animint2)
ggplot()+
  geom_point(aes(
    x, y),
    shape=1,
    data=task.dt)+
  coord_equal()

Above we see a scatterplot of the simulated data. The goal of the learning algorithm will be to predict y from x.

The code below assigns three test groups to the randomly simulated data.

atomic.group.size <- 2
task.dt[, agroup := rep(seq(1, N/atomic.group.size), each=atomic.group.size)][]
#>               x           y agroup
#>           <num>       <num>  <int>
#>    1: -44.11648 -0.40781530      1
#>    2:  28.33237 -0.08520601      1
#>    3:  10.26569 -1.23266284      2
#>    4: -46.47273 -1.36225125      2
#>    5:  62.13751 -1.33779346      3
#>   ---                             
#> 2096:  60.83765 -0.10678010   1048
#> 2097:  55.71469 -0.92403513   1049
#> 2098:  14.31045  1.04519820   1049
#> 2099:  27.18008  1.67815828   1050
#> 2100:  23.67202 -0.26881102   1050
task.dt[, random_group := rep(
  rep(c("A","B","B","C","C","C","C"), each=atomic.group.size),
  l=.N
)][]
#>               x           y agroup random_group
#>           <num>       <num>  <int>       <char>
#>    1: -44.11648 -0.40781530      1            A
#>    2:  28.33237 -0.08520601      1            A
#>    3:  10.26569 -1.23266284      2            B
#>    4: -46.47273 -1.36225125      2            B
#>    5:  62.13751 -1.33779346      3            B
#>   ---                                          
#> 2096:  60.83765 -0.10678010   1048            C
#> 2097:  55.71469 -0.92403513   1049            C
#> 2098:  14.31045  1.04519820   1049            C
#> 2099:  27.18008  1.67815828   1050            C
#> 2100:  23.67202 -0.26881102   1050            C
table(group.tab <- task.dt$random_group)
#> 
#>    A    B    C 
#>  300  600 1200

The output above shows the number of rows in each random group. Below we define a task using that group,

reg.task <- mlr3::TaskRegr$new(
  "sin", task.dt, target="y")
reg.task$col_roles$subset <- "random_group"
reg.task$col_roles$group <- "agroup"
reg.task$col_roles$stratum <- "random_group"
reg.task$col_roles$feature <- "x"
str(reg.task$col_roles)
#> List of 8
#>  $ feature: chr "x"
#>  $ target : chr "y"
#>  $ name   : chr(0) 
#>  $ order  : chr(0) 
#>  $ stratum: chr "random_group"
#>  $ group  : chr "agroup"
#>  $ weight : chr(0) 
#>  $ subset : chr "random_group"

Below we define cross-validation.

same_other_sizes_cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
same_other_sizes_cv$instantiate(reg.task)
same_other_sizes_cv$instance$iteration.dt
#>     test.subset train.subsets groups test.fold                        test
#>          <char>        <char>  <int>     <int>                      <list>
#>  1:           A           all    700         1       43,44,57,58,71,72,...
#>  2:           B           all    700         1        3, 4, 5, 6,17,18,...
#>  3:           C           all    700         1       23,24,25,26,37,38,...
#>  4:           A           all    700         2        1, 2,15,16,29,30,...
#>  5:           B           all    700         2       33,34,47,48,61,62,...
#>  6:           C           all    700         2       13,14,21,22,35,36,...
#>  7:           A           all    700         3  99,100,155,156,169,170,...
#>  8:           B           all    700         3       19,20,45,46,75,76,...
#>  9:           C           all    700         3        7, 8, 9,10,11,12,...
#> 10:           A         other    600         1       43,44,57,58,71,72,...
#> 11:           B         other    500         1        3, 4, 5, 6,17,18,...
#> 12:           C         other    300         1       23,24,25,26,37,38,...
#> 13:           A         other    600         2        1, 2,15,16,29,30,...
#> 14:           B         other    500         2       33,34,47,48,61,62,...
#> 15:           C         other    300         2       13,14,21,22,35,36,...
#> 16:           A         other    600         3  99,100,155,156,169,170,...
#> 17:           B         other    500         3       19,20,45,46,75,76,...
#> 18:           C         other    300         3        7, 8, 9,10,11,12,...
#> 19:           A          same    100         1       43,44,57,58,71,72,...
#> 20:           B          same    200         1        3, 4, 5, 6,17,18,...
#> 21:           C          same    400         1       23,24,25,26,37,38,...
#> 22:           A          same    100         2        1, 2,15,16,29,30,...
#> 23:           B          same    200         2       33,34,47,48,61,62,...
#> 24:           C          same    400         2       13,14,21,22,35,36,...
#> 25:           A          same    100         3  99,100,155,156,169,170,...
#> 26:           B          same    200         3       19,20,45,46,75,76,...
#> 27:           C          same    400         3        7, 8, 9,10,11,12,...
#>     test.subset train.subsets groups test.fold                        test
#>                     train  seed n.train.groups iteration
#>                    <list> <int>          <int>     <int>
#>  1:  1, 2, 7, 8, 9,10,...     1            700         1
#>  2:  1, 2, 7, 8, 9,10,...     1            700         2
#>  3:  1, 2, 7, 8, 9,10,...     1            700         3
#>  4:       3,4,5,6,7,8,...     1            700         4
#>  5:       3,4,5,6,7,8,...     1            700         5
#>  6:       3,4,5,6,7,8,...     1            700         6
#>  7:       1,2,3,4,5,6,...     1            700         7
#>  8:       1,2,3,4,5,6,...     1            700         8
#>  9:       1,2,3,4,5,6,...     1            700         9
#> 10:  7, 8, 9,10,11,12,...     1            600        10
#> 11:  1, 2, 7, 8, 9,10,...     1            500        11
#> 12:  1, 2,15,16,19,20,...     1            300        12
#> 13:       3,4,5,6,7,8,...     1            600        13
#> 14:  7, 8, 9,10,11,12,...     1            500        14
#> 15:  3, 4, 5, 6,17,18,...     1            300        15
#> 16:  3, 4, 5, 6,13,14,...     1            600        16
#> 17:  1, 2,13,14,15,16,...     1            500        17
#> 18:       1,2,3,4,5,6,...     1            300        18
#> 19:  1, 2,15,16,29,30,...     1            100        19
#> 20: 19,20,33,34,45,46,...     1            200        20
#> 21:  7, 8, 9,10,11,12,...     1            400        21
#> 22: 43,44,57,58,71,72,...     1            100        22
#> 23:  3, 4, 5, 6,17,18,...     1            200        23
#> 24:  7, 8, 9,10,11,12,...     1            400        24
#> 25:  1, 2,15,16,29,30,...     1            100        25
#> 26:  3, 4, 5, 6,17,18,...     1            200        26
#> 27: 13,14,21,22,23,24,...     1            400        27
#>                     train  seed n.train.groups iteration

So using the K-fold cross-validation, we will do one train/test split for each row of the table above. There is one row for each combination of test subset (A/B/C), train subset (same/other/all), and test fold (1/2/3).

We compute and plot the results using the code below,

(reg.learner.list <- list(
  mlr3::LearnerRegrFeatureless$new()))
#> [[1]]
#> <LearnerRegrFeatureless:regr.featureless>: Featureless Regression Learner
#> * Model: -
#> * Parameters: robust=FALSE
#> * Packages: mlr3, stats
#> * Predict Types:  [response], se
#> * Feature Types: logical, integer, numeric, character, factor, ordered,
#>   POSIXct
#> * Properties: featureless, importance, missings, selected_features
if(requireNamespace("rpart")){
  reg.learner.list$rpart <- mlr3::LearnerRegrRpart$new()
}
(same.other.grid <- mlr3::benchmark_grid(
  reg.task,
  reg.learner.list,
  same_other_sizes_cv))
#>      task          learner          resampling
#>    <char>           <char>              <char>
#> 1:    sin regr.featureless same_other_sizes_cv
#> 2:    sin       regr.rpart same_other_sizes_cv
##if(require(future))plan("multisession")
lgr::get_logger("mlr3")$set_threshold("warn")
(same.other.result <- mlr3::benchmark(
  same.other.grid, store_models = TRUE))
#> <BenchmarkResult> of 54 rows with 2 resampling runs
#>  nr task_id       learner_id       resampling_id iters warnings errors
#>   1     sin regr.featureless same_other_sizes_cv    27        0      0
#>   2     sin       regr.rpart same_other_sizes_cv    27        0      0
same.other.score <- mlr3resampling::score(same.other.result)
same.other.score[, n.train := sapply(train, length)]
same.other.score[1]
#>    test.subset train.subsets groups test.fold                  test
#>         <char>        <char>  <int>     <int>                <list>
#> 1:           A           all    700         1 43,44,57,58,71,72,...
#>                    train  seed n.train.groups iteration
#>                   <list> <int>          <int>     <int>
#> 1:  1, 2, 7, 8, 9,10,...     1            700         1
#>                                   uhash    nr           task task_id
#>                                  <char> <int>         <list>  <char>
#> 1: 6bf9fd54-7d6a-4bd0-85fc-9c1437d80f1e     1 <TaskRegr:sin>     sin
#>                                      learner       learner_id
#>                                       <list>           <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>                      resampling       resampling_id       prediction regr.mse
#>                          <list>              <char>           <list>    <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.819815
#>      algorithm n.train
#>         <char>   <int>
#> 1: featureless    1400

ggplot()+
  geom_point(aes(
    regr.mse, train.subsets, color=algorithm),
    shape=1,
    data=same.other.score)+
  geom_text(aes(
    Inf, train.subsets,
    label=sprintf("n.train=%d ", n.train)),
    hjust=1,
    vjust=1.5,
    shape=1,
    data=same.other.score[algorithm=="featureless" & test.fold==1])+
  facet_grid(. ~ test.subset, labeller=label_both, scales="free")+
  scale_x_log10(
    "Mean squared prediction error (test set)")


same.other.wide <- dcast(
  same.other.score,
  algorithm + test.subset + train.subsets ~ .,
  list(mean, sd),
  value.var="regr.mse")
ggplot()+
  geom_segment(aes(
    regr.mse_mean+regr.mse_sd, train.subsets,
    xend=regr.mse_mean-regr.mse_sd, yend=train.subsets,
    color=algorithm),
    shape=1,
    data=same.other.wide)+
  geom_point(aes(
    regr.mse_mean, train.subsets, color=algorithm),
    shape=1,
    data=same.other.wide)+
  geom_text(aes(
    Inf, train.subsets,
    label=sprintf("n.train=%d ", n.train)),
    hjust=1,
    vjust=1.5,
    shape=1,
    data=same.other.score[algorithm=="featureless" & test.fold==1])+
  facet_grid(. ~ test.subset, labeller=label_both, scales="free")+
  scale_x_log10(
    "Mean squared prediction error (test set)")

The figures above show a test subset in each panel, the train subsets on the y axis, the test error on the x axis, the two different algorithms are shown in two different colors. We can clearly see that

For train.subsets=same, test error is largest, sometimes almost as large as featureless, which is the error rate when no relationship has been learned between inputs and outputs (not enough data).
For train.subsets=other, rpart test error is significantly smaller than featureless, indicating that some non-trivial relationship between inputs and outputs has been learned. Sometimes other has larger error than same, sometimes smaller (depending on sample size).
For train.subsets=all, rpart test error tends to be minimal, which indicates that combining all of the subsets is beneficial in this case (when the pattern is exactly the same in the different subsets).

Overall in the plot above, all tends to have less prediction error than same, which suggests that the subsets are similar (and indeed there are iid in this simulation).

Below we visualize test error as a function of train size.

ggplot()+
  geom_line(aes(
    n.train, regr.mse,
    color=algorithm,
    subset=paste(algorithm, test.fold)),
    data=same.other.score)+
  geom_label(aes(
    n.train, regr.mse,
    color=algorithm,
    label=train.subsets),
    data=same.other.score)+
  facet_grid(. ~ test.subset, labeller=label_both, scales="free")+
  scale_y_log10(
    "Mean squared prediction error (test set)")

Downsample to see how many train data are required for good accuracy overall

In the previous section we defined a task using the subset role, which means that the different values in that column will be used to define different subsets for training/testing using same/other/all CV. In contrast, below we define a task without the subset role, which means that we will not have separate CV iterations for same/other/all (full data is treated as one subset / train subset is same).

task.no.subset <- mlr3::TaskRegr$new(
  "sin", task.dt, target="y")
task.no.subset$col_roles$group <- "agroup"
task.no.subset$col_roles$stratum <- "random_group"
task.no.subset$col_roles$feature <- "x"
str(task.no.subset$col_roles)
#> List of 7
#>  $ feature: chr "x"
#>  $ target : chr "y"
#>  $ name   : chr(0) 
#>  $ order  : chr(0) 
#>  $ stratum: chr "random_group"
#>  $ group  : chr "agroup"
#>  $ weight : chr(0)

Below we define cross-validation, and we set the sizes to 5 so we can see what happens when we have have train sets that are 5 sizes smaller than the full train set size.

same_other_sizes_cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
same_other_sizes_cv$param_set$values$sizes <- 5
same_other_sizes_cv$instantiate(task.no.subset)
same_other_sizes_cv$instance$iteration.dt
#>     test.subset train.subsets groups test.fold                  test
#>          <char>        <char>  <int>     <int>                <list>
#>  1:        full          same    700         1  3, 4,13,14,15,16,...
#>  2:        full          same    700         1  3, 4,13,14,15,16,...
#>  3:        full          same    700         1  3, 4,13,14,15,16,...
#>  4:        full          same    700         1  3, 4,13,14,15,16,...
#>  5:        full          same    700         1  3, 4,13,14,15,16,...
#>  6:        full          same    700         1  3, 4,13,14,15,16,...
#>  7:        full          same    700         2  1, 2,17,18,21,22,...
#>  8:        full          same    700         2  1, 2,17,18,21,22,...
#>  9:        full          same    700         2  1, 2,17,18,21,22,...
#> 10:        full          same    700         2  1, 2,17,18,21,22,...
#> 11:        full          same    700         2  1, 2,17,18,21,22,...
#> 12:        full          same    700         2  1, 2,17,18,21,22,...
#> 13:        full          same    700         3  5, 6, 7, 8, 9,10,...
#> 14:        full          same    700         3  5, 6, 7, 8, 9,10,...
#> 15:        full          same    700         3  5, 6, 7, 8, 9,10,...
#> 16:        full          same    700         3  5, 6, 7, 8, 9,10,...
#> 17:        full          same    700         3  5, 6, 7, 8, 9,10,...
#> 18:        full          same    700         3  5, 6, 7, 8, 9,10,...
#>                           train  seed n.train.groups iteration
#>                          <list> <int>          <int>     <int>
#>  1: 565,566,583,584,743,744,...     1             21         1
#>  2: 133,134,171,172,305,306,...     1             43         2
#>  3:       77,78,93,94,95,96,...     1             87         3
#>  4:        7, 8,25,26,29,30,...     1            175         4
#>  5:        1, 2, 7, 8,17,18,...     1            350         5
#>  6:             1,2,5,6,7,8,...     1            700         6
#>  7:  39, 40,109,110,285,286,...     1             21         7
#>  8:       29,30,37,38,39,40,...     1             43         8
#>  9:       29,30,37,38,39,40,...     1             87         9
#> 10:        3, 4,13,14,23,24,...     1            175        10
#> 11:        3, 4,13,14,19,20,...     1            350        11
#> 12:             3,4,5,6,7,8,...     1            700        12
#> 13: 209,210,397,398,519,520,...     1             21        13
#> 14: 139,140,209,210,343,344,...     1             43        14
#> 15:        1, 2,17,18,43,44,...     1             87        15
#> 16:        1, 2,17,18,37,38,...     1            175        16
#> 17:        1, 2,13,14,17,18,...     1            350        17
#> 18:        1, 2, 3, 4,13,14,...     1            700        18

So using the K-fold cross-validation, we will do one train/test split for each row of the table above. There is one row for each combination of n.train.groups (full train set size + 5 smaller sizes), and test fold (1/2/3).

We compute and plot the results using the code below,

(reg.learner.list <- list(
  mlr3::LearnerRegrFeatureless$new()))
#> [[1]]
#> <LearnerRegrFeatureless:regr.featureless>: Featureless Regression Learner
#> * Model: -
#> * Parameters: robust=FALSE
#> * Packages: mlr3, stats
#> * Predict Types:  [response], se
#> * Feature Types: logical, integer, numeric, character, factor, ordered,
#>   POSIXct
#> * Properties: featureless, importance, missings, selected_features
if(requireNamespace("rpart")){
  reg.learner.list$rpart <- mlr3::LearnerRegrRpart$new()
}
(same.other.grid <- mlr3::benchmark_grid(
  task.no.subset,
  reg.learner.list,
  same_other_sizes_cv))
#>      task          learner          resampling
#>    <char>           <char>              <char>
#> 1:    sin regr.featureless same_other_sizes_cv
#> 2:    sin       regr.rpart same_other_sizes_cv
##if(require(future))plan("multisession")
lgr::get_logger("mlr3")$set_threshold("warn")
(same.other.result <- mlr3::benchmark(
  same.other.grid, store_models = TRUE))
#> <BenchmarkResult> of 36 rows with 2 resampling runs
#>  nr task_id       learner_id       resampling_id iters warnings errors
#>   1     sin regr.featureless same_other_sizes_cv    18        0      0
#>   2     sin       regr.rpart same_other_sizes_cv    18        0      0
same.other.score <- mlr3resampling::score(same.other.result)
same.other.score[, n.train := sapply(train, length)]
same.other.score[1]
#>    test.subset train.subsets groups test.fold                  test
#>         <char>        <char>  <int>     <int>                <list>
#> 1:        full          same    700         1  3, 4,13,14,15,16,...
#>                          train  seed n.train.groups iteration
#>                         <list> <int>          <int>     <int>
#> 1: 565,566,583,584,743,744,...     1             21         1
#>                                   uhash    nr           task task_id
#>                                  <char> <int>         <list>  <char>
#> 1: 9610f56c-73f2-4e34-b0f9-d397db3f1726     1 <TaskRegr:sin>     sin
#>                                      learner       learner_id
#>                                       <list>           <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>                      resampling       resampling_id       prediction  regr.mse
#>                          <list>              <char>           <list>     <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7650132
#>      algorithm n.train
#>         <char>   <int>
#> 1: featureless      42

ggplot()+
  geom_line(aes(
    n.train, regr.mse,
    color=algorithm,
    subset=paste(algorithm, test.fold)),
    data=same.other.score)+
  geom_point(aes(
    n.train, regr.mse,
    color=algorithm),
    data=same.other.score)+
  facet_grid(. ~ test.subset, labeller=label_both, scales="free")+
  scale_x_log10(
    "Number of train rows",
    breaks=unique(same.other.score$n.train))+
  scale_y_log10(
    "Mean squared prediction error (test set)")

From the plot above, it looks like about 700 rows is enough to get minimal test error, using the rpart learner.

Downsample to sizes of other sets

N <- 600
abs.x <- 20
set.seed(1)
x.vec <- sort(runif(N, -abs.x, abs.x))
str(x.vec)
#>  num [1:600] -19.9 -19.9 -19.7 -19.6 -19.6 ...
library(data.table)
(task.dt <- data.table(
  x=x.vec,
  y = sin(x.vec)+rnorm(N,sd=0.5)))
#>              x          y
#>          <num>      <num>
#>   1: -19.92653 -0.4336887
#>   2: -19.92269 -1.4023484
#>   3: -19.67486  0.2509134
#>   4: -19.55856 -0.8428921
#>   5: -19.55402  0.1794473
#>  ---                     
#> 596:  19.70736  0.7497818
#> 597:  19.74997  0.3178435
#> 598:  19.75656  1.3950030
#> 599:  19.83862 -0.2086586
#> 600:  19.84309  0.5748863
library(animint2)
ggplot()+
  geom_point(aes(
    x, y),
    shape=1,
    data=task.dt)+
  coord_equal()

atomic.subset.size <- 2
task.dt[, agroup := rep(seq(1, N/atomic.subset.size), each=atomic.subset.size)][]
#>              x          y agroup
#>          <num>      <num>  <int>
#>   1: -19.92653 -0.4336887      1
#>   2: -19.92269 -1.4023484      1
#>   3: -19.67486  0.2509134      2
#>   4: -19.55856 -0.8428921      2
#>   5: -19.55402  0.1794473      3
#>  ---                            
#> 596:  19.70736  0.7497818    298
#> 597:  19.74997  0.3178435    299
#> 598:  19.75656  1.3950030    299
#> 599:  19.83862 -0.2086586    300
#> 600:  19.84309  0.5748863    300
task.dt[, random_subset := rep(
  rep(c("A","B","B","B"), each=atomic.subset.size),
  l=.N
)][]
#>              x          y agroup random_subset
#>          <num>      <num>  <int>        <char>
#>   1: -19.92653 -0.4336887      1             A
#>   2: -19.92269 -1.4023484      1             A
#>   3: -19.67486  0.2509134      2             B
#>   4: -19.55856 -0.8428921      2             B
#>   5: -19.55402  0.1794473      3             B
#>  ---                                          
#> 596:  19.70736  0.7497818    298             B
#> 597:  19.74997  0.3178435    299             B
#> 598:  19.75656  1.3950030    299             B
#> 599:  19.83862 -0.2086586    300             B
#> 600:  19.84309  0.5748863    300             B
table(subset.tab <- task.dt$random_subset)
#> 
#>   A   B 
#> 150 450

reg.task <- mlr3::TaskRegr$new(
  "sin", task.dt, target="y")
reg.task$col_roles$subset <- "random_subset"
reg.task$col_roles$group <- "agroup"
reg.task$col_roles$stratum <- "random_subset"
reg.task$col_roles$feature <- "x"
same_other_sizes_cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()

In the previous section we analyzed prediction accuracy of same/other/all, which corresponds to keeping sizes parameter at default of -1. The main difference in this section is that we change sizes to 0, which means to down-sample same/other/all, so we can see if there is an effect for sample size (there should be for iid problems with intermediate difficulty). We set sizes to 0 in the next line:

same_other_sizes_cv$param_set$values$sizes <- 0
same_other_sizes_cv$instantiate(reg.task)
same_other_sizes_cv$instance$it
#>     test.subset train.subsets groups test.fold                  test
#>          <char>        <char>  <int>     <int>                <list>
#>  1:           A           all    200         1  1, 2,49,50,57,58,...
#>  2:           A           all    200         1  1, 2,49,50,57,58,...
#>  3:           A           all    200         1  1, 2,49,50,57,58,...
#>  4:           B           all    200         1 19,20,31,32,37,38,...
#>  5:           B           all    200         1 19,20,31,32,37,38,...
#>  6:           B           all    200         1 19,20,31,32,37,38,...
#>  7:           A           all    200         2 17,18,41,42,89,90,...
#>  8:           A           all    200         2 17,18,41,42,89,90,...
#>  9:           A           all    200         2 17,18,41,42,89,90,...
#> 10:           B           all    200         2       3,4,5,6,7,8,...
#> 11:           B           all    200         2       3,4,5,6,7,8,...
#> 12:           B           all    200         2       3,4,5,6,7,8,...
#> 13:           A           all    200         3  9,10,25,26,33,34,...
#> 14:           A           all    200         3  9,10,25,26,33,34,...
#> 15:           A           all    200         3  9,10,25,26,33,34,...
#> 16:           B           all    200         3 15,16,21,22,23,24,...
#> 17:           B           all    200         3 15,16,21,22,23,24,...
#> 18:           B           all    200         3 15,16,21,22,23,24,...
#> 19:           A         other    150         1  1, 2,49,50,57,58,...
#> 20:           A         other    150         1  1, 2,49,50,57,58,...
#> 21:           B         other     50         1 19,20,31,32,37,38,...
#> 22:           A         other    150         2 17,18,41,42,89,90,...
#> 23:           A         other    150         2 17,18,41,42,89,90,...
#> 24:           B         other     50         2       3,4,5,6,7,8,...
#> 25:           A         other    150         3  9,10,25,26,33,34,...
#> 26:           A         other    150         3  9,10,25,26,33,34,...
#> 27:           B         other     50         3 15,16,21,22,23,24,...
#> 28:           A          same     50         1  1, 2,49,50,57,58,...
#> 29:           B          same    150         1 19,20,31,32,37,38,...
#> 30:           B          same    150         1 19,20,31,32,37,38,...
#> 31:           A          same     50         2 17,18,41,42,89,90,...
#> 32:           B          same    150         2       3,4,5,6,7,8,...
#> 33:           B          same    150         2       3,4,5,6,7,8,...
#> 34:           A          same     50         3  9,10,25,26,33,34,...
#> 35:           B          same    150         3 15,16,21,22,23,24,...
#> 36:           B          same    150         3 15,16,21,22,23,24,...
#>     test.subset train.subsets groups test.fold                  test
#>                     train  seed n.train.groups iteration
#>                    <list> <int>          <int>     <int>
#>  1:  5, 6, 9,10,15,16,...     1             50         1
#>  2:       3,4,5,6,7,8,...     1            150         2
#>  3:       3,4,5,6,7,8,...     1            200         3
#>  4:  3, 4, 7, 8,15,16,...     1             50         4
#>  5:       3,4,5,6,7,8,...     1            150         5
#>  6:       3,4,5,6,7,8,...     1            200         6
#>  7:  1, 2,35,36,39,40,...     1             50         7
#>  8:  1, 2, 9,10,19,20,...     1            150         8
#>  9:  1, 2, 9,10,15,16,...     1            200         9
#> 10: 19,20,63,64,73,74,...     1             50        10
#> 11:  1, 2, 9,10,15,16,...     1            150        11
#> 12:  1, 2, 9,10,15,16,...     1            200        12
#> 13: 29,30,37,38,49,50,...     1             50        13
#> 14:  5, 6,11,12,13,14,...     1            150        14
#> 15:       1,2,3,4,5,6,...     1            200        15
#> 16: 13,14,29,30,49,50,...     1             50        16
#> 17:       1,2,3,4,5,6,...     1            150        17
#> 18:       1,2,3,4,5,6,...     1            200        18
#> 19: 15,16,21,22,55,56,...     1             50        19
#> 20:       3,4,5,6,7,8,...     1            150        20
#> 21:  9,10,17,18,25,26,...     1             50        21
#> 22: 15,16,19,20,23,24,...     1             50        22
#> 23: 15,16,19,20,21,22,...     1            150        23
#> 24:  1, 2, 9,10,25,26,...     1             50        24
#> 25: 11,12,19,20,27,28,...     1             50        25
#> 26:       3,4,5,6,7,8,...     1            150        26
#> 27:  1, 2,17,18,41,42,...     1             50        27
#> 28:  9,10,17,18,25,26,...     1             50        28
#> 29: 59,60,63,64,75,76,...     1             50        29
#> 30:       3,4,5,6,7,8,...     1            150        30
#> 31:  1, 2, 9,10,25,26,...     1             50        31
#> 32: 23,24,37,38,51,52,...     1             50        32
#> 33: 15,16,19,20,21,22,...     1            150        33
#> 34:  1, 2,17,18,41,42,...     1             50        34
#> 35: 11,12,19,20,45,46,...     1             50        35
#> 36:       3,4,5,6,7,8,...     1            150        36
#>                     train  seed n.train.groups iteration
(reg.learner.list <- list(
  mlr3::LearnerRegrFeatureless$new()))
#> [[1]]
#> <LearnerRegrFeatureless:regr.featureless>: Featureless Regression Learner
#> * Model: -
#> * Parameters: robust=FALSE
#> * Packages: mlr3, stats
#> * Predict Types:  [response], se
#> * Feature Types: logical, integer, numeric, character, factor, ordered,
#>   POSIXct
#> * Properties: featureless, importance, missings, selected_features
if(requireNamespace("rpart")){
  reg.learner.list$rpart <- mlr3::LearnerRegrRpart$new()
}
(same.other.grid <- mlr3::benchmark_grid(
  reg.task,
  reg.learner.list,
  same_other_sizes_cv))
#>      task          learner          resampling
#>    <char>           <char>              <char>
#> 1:    sin regr.featureless same_other_sizes_cv
#> 2:    sin       regr.rpart same_other_sizes_cv
##if(require(future))plan("multisession")
lgr::get_logger("mlr3")$set_threshold("warn")
(same.other.result <- mlr3::benchmark(
  same.other.grid, store_models = TRUE))
#> <BenchmarkResult> of 72 rows with 2 resampling runs
#>  nr task_id       learner_id       resampling_id iters warnings errors
#>   1     sin regr.featureless same_other_sizes_cv    36        0      0
#>   2     sin       regr.rpart same_other_sizes_cv    36        0      0
same.other.score <- mlr3resampling::score(same.other.result)
same.other.score[1]
#>    test.subset train.subsets groups test.fold                  test
#>         <char>        <char>  <int>     <int>                <list>
#> 1:           A           all    200         1  1, 2,49,50,57,58,...
#>                    train  seed n.train.groups iteration
#>                   <list> <int>          <int>     <int>
#> 1:  5, 6, 9,10,15,16,...     1             50         1
#>                                   uhash    nr           task task_id
#>                                  <char> <int>         <list>  <char>
#> 1: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>                                      learner       learner_id
#>                                       <list>           <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>                      resampling       resampling_id       prediction  regr.mse
#>                          <list>              <char>           <list>     <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5814085
#>      algorithm
#>         <char>
#> 1: featureless

The plot below shows the same results (no down-sampling) as if we did sizes=-1 (like in the previous section.

ggplot()+
  geom_point(aes(
    regr.mse, train.subsets, color=algorithm),
    shape=1,
    data=same.other.score[groups==n.train.groups])+
  facet_grid(. ~ test.subset, labeller=label_both)

The plots below compare all six train subsets (including three down-sampled), and it it is clear there is an effect for sample size.

same.other.score[, subset.N := paste(train.subsets, n.train.groups)][]
#>     test.subset train.subsets groups test.fold                  test
#>          <char>        <char>  <int>     <int>                <list>
#>  1:           A           all    200         1  1, 2,49,50,57,58,...
#>  2:           A           all    200         1  1, 2,49,50,57,58,...
#>  3:           A           all    200         1  1, 2,49,50,57,58,...
#>  4:           B           all    200         1 19,20,31,32,37,38,...
#>  5:           B           all    200         1 19,20,31,32,37,38,...
#>  6:           B           all    200         1 19,20,31,32,37,38,...
#>  7:           A           all    200         2 17,18,41,42,89,90,...
#>  8:           A           all    200         2 17,18,41,42,89,90,...
#>  9:           A           all    200         2 17,18,41,42,89,90,...
#> 10:           B           all    200         2       3,4,5,6,7,8,...
#> 11:           B           all    200         2       3,4,5,6,7,8,...
#> 12:           B           all    200         2       3,4,5,6,7,8,...
#> 13:           A           all    200         3  9,10,25,26,33,34,...
#> 14:           A           all    200         3  9,10,25,26,33,34,...
#> 15:           A           all    200         3  9,10,25,26,33,34,...
#> 16:           B           all    200         3 15,16,21,22,23,24,...
#> 17:           B           all    200         3 15,16,21,22,23,24,...
#> 18:           B           all    200         3 15,16,21,22,23,24,...
#> 19:           A         other    150         1  1, 2,49,50,57,58,...
#> 20:           A         other    150         1  1, 2,49,50,57,58,...
#> 21:           B         other     50         1 19,20,31,32,37,38,...
#> 22:           A         other    150         2 17,18,41,42,89,90,...
#> 23:           A         other    150         2 17,18,41,42,89,90,...
#> 24:           B         other     50         2       3,4,5,6,7,8,...
#> 25:           A         other    150         3  9,10,25,26,33,34,...
#> 26:           A         other    150         3  9,10,25,26,33,34,...
#> 27:           B         other     50         3 15,16,21,22,23,24,...
#> 28:           A          same     50         1  1, 2,49,50,57,58,...
#> 29:           B          same    150         1 19,20,31,32,37,38,...
#> 30:           B          same    150         1 19,20,31,32,37,38,...
#> 31:           A          same     50         2 17,18,41,42,89,90,...
#> 32:           B          same    150         2       3,4,5,6,7,8,...
#> 33:           B          same    150         2       3,4,5,6,7,8,...
#> 34:           A          same     50         3  9,10,25,26,33,34,...
#> 35:           B          same    150         3 15,16,21,22,23,24,...
#> 36:           B          same    150         3 15,16,21,22,23,24,...
#> 37:           A           all    200         1  1, 2,49,50,57,58,...
#> 38:           A           all    200         1  1, 2,49,50,57,58,...
#> 39:           A           all    200         1  1, 2,49,50,57,58,...
#> 40:           B           all    200         1 19,20,31,32,37,38,...
#> 41:           B           all    200         1 19,20,31,32,37,38,...
#> 42:           B           all    200         1 19,20,31,32,37,38,...
#> 43:           A           all    200         2 17,18,41,42,89,90,...
#> 44:           A           all    200         2 17,18,41,42,89,90,...
#> 45:           A           all    200         2 17,18,41,42,89,90,...
#> 46:           B           all    200         2       3,4,5,6,7,8,...
#> 47:           B           all    200         2       3,4,5,6,7,8,...
#> 48:           B           all    200         2       3,4,5,6,7,8,...
#> 49:           A           all    200         3  9,10,25,26,33,34,...
#> 50:           A           all    200         3  9,10,25,26,33,34,...
#> 51:           A           all    200         3  9,10,25,26,33,34,...
#> 52:           B           all    200         3 15,16,21,22,23,24,...
#> 53:           B           all    200         3 15,16,21,22,23,24,...
#> 54:           B           all    200         3 15,16,21,22,23,24,...
#> 55:           A         other    150         1  1, 2,49,50,57,58,...
#> 56:           A         other    150         1  1, 2,49,50,57,58,...
#> 57:           B         other     50         1 19,20,31,32,37,38,...
#> 58:           A         other    150         2 17,18,41,42,89,90,...
#> 59:           A         other    150         2 17,18,41,42,89,90,...
#> 60:           B         other     50         2       3,4,5,6,7,8,...
#> 61:           A         other    150         3  9,10,25,26,33,34,...
#> 62:           A         other    150         3  9,10,25,26,33,34,...
#> 63:           B         other     50         3 15,16,21,22,23,24,...
#> 64:           A          same     50         1  1, 2,49,50,57,58,...
#> 65:           B          same    150         1 19,20,31,32,37,38,...
#> 66:           B          same    150         1 19,20,31,32,37,38,...
#> 67:           A          same     50         2 17,18,41,42,89,90,...
#> 68:           B          same    150         2       3,4,5,6,7,8,...
#> 69:           B          same    150         2       3,4,5,6,7,8,...
#> 70:           A          same     50         3  9,10,25,26,33,34,...
#> 71:           B          same    150         3 15,16,21,22,23,24,...
#> 72:           B          same    150         3 15,16,21,22,23,24,...
#>     test.subset train.subsets groups test.fold                  test
#>                     train  seed n.train.groups iteration
#>                    <list> <int>          <int>     <int>
#>  1:  5, 6, 9,10,15,16,...     1             50         1
#>  2:       3,4,5,6,7,8,...     1            150         2
#>  3:       3,4,5,6,7,8,...     1            200         3
#>  4:  3, 4, 7, 8,15,16,...     1             50         4
#>  5:       3,4,5,6,7,8,...     1            150         5
#>  6:       3,4,5,6,7,8,...     1            200         6
#>  7:  1, 2,35,36,39,40,...     1             50         7
#>  8:  1, 2, 9,10,19,20,...     1            150         8
#>  9:  1, 2, 9,10,15,16,...     1            200         9
#> 10: 19,20,63,64,73,74,...     1             50        10
#> 11:  1, 2, 9,10,15,16,...     1            150        11
#> 12:  1, 2, 9,10,15,16,...     1            200        12
#> 13: 29,30,37,38,49,50,...     1             50        13
#> 14:  5, 6,11,12,13,14,...     1            150        14
#> 15:       1,2,3,4,5,6,...     1            200        15
#> 16: 13,14,29,30,49,50,...     1             50        16
#> 17:       1,2,3,4,5,6,...     1            150        17
#> 18:       1,2,3,4,5,6,...     1            200        18
#> 19: 15,16,21,22,55,56,...     1             50        19
#> 20:       3,4,5,6,7,8,...     1            150        20
#> 21:  9,10,17,18,25,26,...     1             50        21
#> 22: 15,16,19,20,23,24,...     1             50        22
#> 23: 15,16,19,20,21,22,...     1            150        23
#> 24:  1, 2, 9,10,25,26,...     1             50        24
#> 25: 11,12,19,20,27,28,...     1             50        25
#> 26:       3,4,5,6,7,8,...     1            150        26
#> 27:  1, 2,17,18,41,42,...     1             50        27
#> 28:  9,10,17,18,25,26,...     1             50        28
#> 29: 59,60,63,64,75,76,...     1             50        29
#> 30:       3,4,5,6,7,8,...     1            150        30
#> 31:  1, 2, 9,10,25,26,...     1             50        31
#> 32: 23,24,37,38,51,52,...     1             50        32
#> 33: 15,16,19,20,21,22,...     1            150        33
#> 34:  1, 2,17,18,41,42,...     1             50        34
#> 35: 11,12,19,20,45,46,...     1             50        35
#> 36:       3,4,5,6,7,8,...     1            150        36
#> 37:  5, 6, 9,10,15,16,...     1             50         1
#> 38:       3,4,5,6,7,8,...     1            150         2
#> 39:       3,4,5,6,7,8,...     1            200         3
#> 40:  3, 4, 7, 8,15,16,...     1             50         4
#> 41:       3,4,5,6,7,8,...     1            150         5
#> 42:       3,4,5,6,7,8,...     1            200         6
#> 43:  1, 2,35,36,39,40,...     1             50         7
#> 44:  1, 2, 9,10,19,20,...     1            150         8
#> 45:  1, 2, 9,10,15,16,...     1            200         9
#> 46: 19,20,63,64,73,74,...     1             50        10
#> 47:  1, 2, 9,10,15,16,...     1            150        11
#> 48:  1, 2, 9,10,15,16,...     1            200        12
#> 49: 29,30,37,38,49,50,...     1             50        13
#> 50:  5, 6,11,12,13,14,...     1            150        14
#> 51:       1,2,3,4,5,6,...     1            200        15
#> 52: 13,14,29,30,49,50,...     1             50        16
#> 53:       1,2,3,4,5,6,...     1            150        17
#> 54:       1,2,3,4,5,6,...     1            200        18
#> 55: 15,16,21,22,55,56,...     1             50        19
#> 56:       3,4,5,6,7,8,...     1            150        20
#> 57:  9,10,17,18,25,26,...     1             50        21
#> 58: 15,16,19,20,23,24,...     1             50        22
#> 59: 15,16,19,20,21,22,...     1            150        23
#> 60:  1, 2, 9,10,25,26,...     1             50        24
#> 61: 11,12,19,20,27,28,...     1             50        25
#> 62:       3,4,5,6,7,8,...     1            150        26
#> 63:  1, 2,17,18,41,42,...     1             50        27
#> 64:  9,10,17,18,25,26,...     1             50        28
#> 65: 59,60,63,64,75,76,...     1             50        29
#> 66:       3,4,5,6,7,8,...     1            150        30
#> 67:  1, 2, 9,10,25,26,...     1             50        31
#> 68: 23,24,37,38,51,52,...     1             50        32
#> 69: 15,16,19,20,21,22,...     1            150        33
#> 70:  1, 2,17,18,41,42,...     1             50        34
#> 71: 11,12,19,20,45,46,...     1             50        35
#> 72:       3,4,5,6,7,8,...     1            150        36
#>                     train  seed n.train.groups iteration
#>                                    uhash    nr           task task_id
#>                                   <char> <int>         <list>  <char>
#>  1: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>  2: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>  3: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>  4: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>  5: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>  6: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>  7: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>  8: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#>  9: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 10: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 11: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 12: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 13: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 14: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 15: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 16: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 17: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 18: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 19: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 20: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 21: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 22: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 23: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 24: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 25: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 26: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 27: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 28: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 29: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 30: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 31: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 32: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 33: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 34: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 35: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 36: b956f97e-1f5b-43c4-b232-ddafed3a8347     1 <TaskRegr:sin>     sin
#> 37: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 38: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 39: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 40: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 41: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 42: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 43: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 44: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 45: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 46: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 47: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 48: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 49: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 50: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 51: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 52: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 53: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 54: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 55: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 56: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 57: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 58: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 59: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 60: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 61: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 62: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 63: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 64: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 65: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 66: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 67: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 68: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 69: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 70: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 71: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#> 72: dc2721cf-a2b3-4505-9fd7-9c7e0c4b546b     2 <TaskRegr:sin>     sin
#>                                    uhash    nr           task task_id
#>                                       learner       learner_id
#>                                        <list>           <char>
#>  1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>  2: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>  3: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>  4: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>  5: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>  6: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>  7: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>  8: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>  9: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 10: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 11: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 12: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 13: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 14: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 15: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 16: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 17: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 18: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 19: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 20: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 21: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 22: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 23: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 24: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 25: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 26: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 27: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 28: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 29: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 30: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 31: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 32: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 33: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 34: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 35: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 36: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#> 37:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 38:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 39:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 40:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 41:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 42:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 43:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 44:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 45:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 46:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 47:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 48:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 49:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 50:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 51:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 52:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 53:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 54:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 55:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 56:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 57:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 58:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 59:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 60:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 61:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 62:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 63:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 64:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 65:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 66:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 67:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 68:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 69:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 70:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 71:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#> 72:             <LearnerRegrRpart:regr.rpart>       regr.rpart
#>                                       learner       learner_id
#>                       resampling       resampling_id       prediction  regr.mse
#>                           <list>              <char>           <list>     <num>
#>  1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5814085
#>  2: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5888123
#>  3: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6004137
#>  4: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7418338
#>  5: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7448785
#>  6: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7426736
#>  7: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7032864
#>  8: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6915582
#>  9: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6935124
#> 10: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6876009
#> 11: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6938962
#> 12: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6927970
#> 13: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8679940
#> 14: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8679637
#> 15: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8698560
#> 16: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8100344
#> 17: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8080412
#> 18: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8079158
#> 19: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6008614
#> 20: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5986958
#> 21: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7438359
#> 22: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7575599
#> 23: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6927668
#> 24: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6974565
#> 25: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8758799
#> 26: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8682078
#> 27: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8095930
#> 28: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6058833
#> 29: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7644929
#> 30: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7423914
#> 31: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6967243
#> 32: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7065626
#> 33: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6915689
#> 34: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8800099
#> 35: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8944441
#> 36: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8090933
#> 37: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.9579886
#> 38: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5572793
#> 39: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5762640
#> 40: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6754320
#> 41: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4605665
#> 42: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4170070
#> 43: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7317229
#> 44: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5239765
#> 45: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.3890640
#> 46: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7954012
#> 47: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.3448899
#> 48: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.3294351
#> 49: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5894307
#> 50: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5876353
#> 51: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5989192
#> 52: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6718522
#> 53: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4275034
#> 54: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5398954
#> 55: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7813902
#> 56: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6348613
#> 57: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5887354
#> 58: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6004308
#> 59: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4871282
#> 60: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5600961
#> 61: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7225103
#> 62: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4658216
#> 63: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7620797
#> 64: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.8782663
#> 65: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7809353
#> 66: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4280411
#> 67: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6343650
#> 68: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.6115182
#> 69: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4005002
#> 70: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.7486917
#> 71: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.9276521
#> 72: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.4816441
#>                       resampling       resampling_id       prediction  regr.mse
#>       algorithm  subset.N
#>          <char>    <char>
#>  1: featureless    all 50
#>  2: featureless   all 150
#>  3: featureless   all 200
#>  4: featureless    all 50
#>  5: featureless   all 150
#>  6: featureless   all 200
#>  7: featureless    all 50
#>  8: featureless   all 150
#>  9: featureless   all 200
#> 10: featureless    all 50
#> 11: featureless   all 150
#> 12: featureless   all 200
#> 13: featureless    all 50
#> 14: featureless   all 150
#> 15: featureless   all 200
#> 16: featureless    all 50
#> 17: featureless   all 150
#> 18: featureless   all 200
#> 19: featureless  other 50
#> 20: featureless other 150
#> 21: featureless  other 50
#> 22: featureless  other 50
#> 23: featureless other 150
#> 24: featureless  other 50
#> 25: featureless  other 50
#> 26: featureless other 150
#> 27: featureless  other 50
#> 28: featureless   same 50
#> 29: featureless   same 50
#> 30: featureless  same 150
#> 31: featureless   same 50
#> 32: featureless   same 50
#> 33: featureless  same 150
#> 34: featureless   same 50
#> 35: featureless   same 50
#> 36: featureless  same 150
#> 37:       rpart    all 50
#> 38:       rpart   all 150
#> 39:       rpart   all 200
#> 40:       rpart    all 50
#> 41:       rpart   all 150
#> 42:       rpart   all 200
#> 43:       rpart    all 50
#> 44:       rpart   all 150
#> 45:       rpart   all 200
#> 46:       rpart    all 50
#> 47:       rpart   all 150
#> 48:       rpart   all 200
#> 49:       rpart    all 50
#> 50:       rpart   all 150
#> 51:       rpart   all 200
#> 52:       rpart    all 50
#> 53:       rpart   all 150
#> 54:       rpart   all 200
#> 55:       rpart  other 50
#> 56:       rpart other 150
#> 57:       rpart  other 50
#> 58:       rpart  other 50
#> 59:       rpart other 150
#> 60:       rpart  other 50
#> 61:       rpart  other 50
#> 62:       rpart other 150
#> 63:       rpart  other 50
#> 64:       rpart   same 50
#> 65:       rpart   same 50
#> 66:       rpart  same 150
#> 67:       rpart   same 50
#> 68:       rpart   same 50
#> 69:       rpart  same 150
#> 70:       rpart   same 50
#> 71:       rpart   same 50
#> 72:       rpart  same 150
#>       algorithm  subset.N
(levs <- same.other.score[order(train.subsets, n.train.groups), unique(subset.N)])
#> [1] "all 50"    "all 150"   "all 200"   "other 50"  "other 150" "same 50"  
#> [7] "same 150"
same.other.score[, subset.N.fac := factor(subset.N, levs)]
ggplot()+
  geom_point(aes(
    regr.mse, subset.N.fac, color=algorithm),
    shape=1,
    data=same.other.score)+
  facet_wrap("test.subset", labeller=label_both, scales="free", nrow=1)


(levs <- same.other.score[order(n.train.groups, train.subsets), unique(subset.N)])
#> [1] "all 50"    "other 50"  "same 50"   "all 150"   "other 150" "same 150" 
#> [7] "all 200"
same.other.score[, N.subset.fac := factor(subset.N, levs)]
ggplot()+
  geom_point(aes(
    regr.mse, N.subset.fac, color=algorithm),
    shape=1,
    data=same.other.score)+
  facet_wrap("test.subset", labeller=label_both, scales="free", nrow=1)

Another way to view the effect of sample size is to plot the test/prediction error, as a function of number of train data, as in the plots below.

ggplot()+
  geom_point(aes(
    n.train.groups, regr.mse,
    color=train.subsets),
    shape=1,
    data=same.other.score)+
  geom_line(aes(
    n.train.groups, regr.mse,
    subset=paste(train.subsets, seed, algorithm),
    linetype=algorithm,
    color=train.subsets),
    data=same.other.score)+
  facet_grid(test.fold ~ test.subset, labeller=label_both)+
  scale_x_log10()


rpart.score <- same.other.score[algorithm=="rpart" & train.subsets != "other"]
ggplot()+
  geom_point(aes(
    n.train.groups, regr.mse,
    color=train.subsets),
    shape=1,
    data=rpart.score)+
  geom_line(aes(
    n.train.groups, regr.mse,
    subset=paste(train.subsets, seed, algorithm),
    color=train.subsets),
    data=rpart.score)+
  facet_grid(test.fold ~ test.subset, labeller=label_both)+
  scale_x_log10()

Use with auto_tuner on a task with stratification and grouping

In this section we show how ResamplingSameOtherSizesCV can be used on a task with stratification and grouping, for hyper-parameter learning. First we recall the previously defined task and evaluation CV.

str(reg.task$col_roles)
#> List of 8
#>  $ feature: chr "x"
#>  $ target : chr "y"
#>  $ name   : chr(0) 
#>  $ order  : chr(0) 
#>  $ stratum: chr "random_subset"
#>  $ group  : chr "agroup"
#>  $ weight : chr(0) 
#>  $ subset : chr "random_subset"

We see in the output aove that the task has column roles for both stratum and group, which normally errors when used with ResamplingCV:

mlr3::ResamplingCV$new()$instantiate(reg.task)
#> Error: Cannot combine stratification with grouping

Below we show how ResamplingSameOtherSizesCV can be used instead:

ignore.cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
ignore.cv$param_set$values$ignore_subset <- TRUE
ignore.cv$instantiate(reg.task)
ignore.cv$instance$iteration.dt
#>    test.subset train.subsets groups test.fold                  test
#>         <char>        <char>  <int>     <int>                <list>
#> 1:        full          same    200         1  5, 6, 7, 8, 9,10,...
#> 2:        full          same    200         2  3, 4,11,12,13,14,...
#> 3:        full          same    200         3  1, 2,25,26,31,32,...
#>                    train  seed n.train.groups iteration
#>                   <list> <int>          <int>     <int>
#> 1:  1, 2, 3, 4,11,12,...     1            200         1
#> 2:       1,2,5,6,7,8,...     1            200         2
#> 3:       3,4,5,6,7,8,...     1            200         3

To use the above CV object with a learning algorithm in a benchmark experiment, we need to use it as the resampling argument to auto_tuner, as in the code below,

do_benchmark <- function(subtrain.valid.cv){
  reg.learner.list <- list(
    mlr3::LearnerRegrFeatureless$new())
  if(requireNamespace("rpart")){
    reg.learner.list$rpart <- mlr3::LearnerRegrRpart$new()
    if(requireNamespace("mlr3tuning")){
      rpart.learner <- mlr3::LearnerRegrRpart$new()
      ##mlr3tuningspaces::lts(rpart.learner)$param_set$values
      rpart.learner$param_set$values$cp <- paradox::to_tune(1e-4, 0.1, log=TRUE)
      reg.learner.list$rpart.tuned <- mlr3tuning::auto_tuner(
        tuner = mlr3tuning::TunerGridSearch$new(),
        learner = rpart.learner,
        resampling = subtrain.valid.cv,
        measure = mlr3::msr("regr.mse"))
    }
  }
  same.other.grid <- mlr3::benchmark_grid(
    reg.task,
    reg.learner.list,
    same_other_sizes_cv)
  lgr::get_logger("bbotk")$set_threshold("warn")
  same.other.result <- mlr3::benchmark(
    same.other.grid, store_models = TRUE)
}

do_benchmark(mlr3::ResamplingCV$new())
#> Loading required namespace: mlr3tuning
#> Error: Cannot combine stratification with grouping

The error above is because ResamplingCV does not support stratification and grouping. To fix that, we can use the code below:

ignore.cv <- mlr3resampling::ResamplingSameOtherSizesCV$new()
ignore.cv$param_set$values$ignore_subset <- TRUE
(same.other.result <- do_benchmark(ignore.cv))
#> <BenchmarkResult> of 108 rows with 3 resampling runs
#>  nr task_id       learner_id       resampling_id iters warnings errors
#>   1     sin regr.featureless same_other_sizes_cv    36        0      0
#>   2     sin       regr.rpart same_other_sizes_cv    36        0      0
#>   3     sin regr.rpart.tuned same_other_sizes_cv    36        0      0

The output above shows that the benchmark worked. The code below plots the results.

same.other.score <- mlr3resampling::score(same.other.result)
same.other.score[1]
#>    test.subset train.subsets groups test.fold                  test
#>         <char>        <char>  <int>     <int>                <list>
#> 1:           A           all    200         1  1, 2,49,50,57,58,...
#>                    train  seed n.train.groups iteration
#>                   <list> <int>          <int>     <int>
#> 1:  5, 6, 9,10,15,16,...     1             50         1
#>                                   uhash    nr           task task_id
#>                                  <char> <int>         <list>  <char>
#> 1: b0494f0d-dd0e-418c-916f-b46a9b005401     1 <TaskRegr:sin>     sin
#>                                      learner       learner_id
#>                                       <list>           <char>
#> 1: <LearnerRegrFeatureless:regr.featureless> regr.featureless
#>                      resampling       resampling_id       prediction  regr.mse
#>                          <list>              <char>           <list>     <num>
#> 1: <ResamplingSameOtherSizesCV> same_other_sizes_cv <PredictionRegr> 0.5814085
#>      algorithm
#>         <char>
#> 1: featureless
same.other.wide <- dcast(
  same.other.score,
  algorithm + test.subset + train.subsets ~ .,
  list(mean, sd),
  value.var="regr.mse")
ggplot()+
  geom_segment(aes(
    regr.mse_mean+regr.mse_sd, train.subsets,
    xend=regr.mse_mean-regr.mse_sd, yend=train.subsets),
    shape=1,
    data=same.other.wide)+
  geom_point(aes(
    regr.mse_mean, train.subsets),
    shape=1,
    data=same.other.wide)+
  facet_grid(algorithm ~ test.subset, labeller=label_both)

The plot above has different panels for rpart (without tuning) and tuned (rpart with tuning of cp).

Conclusions

mlr3resampling::ResamplingSameOtherSizesCV can be used for model evaluation (train/test split):

compare prediction accuracy of models trained on same/other/all subsets (need to set column role subset).
compare prediction accuracy of models trained on down-sampled subsets (need to set param sizes).

It can also be used for model training (subtrain/validation split):

to learn regularization hyper-parameters, on a task with both stratum and group roles (use is as resampling argument of auto_tuner).

Session info

sessionInfo()
#> R Under development (unstable) (2024-01-23 r85822 ucrt)
#> Platform: x86_64-w64-mingw32/x64
#> Running under: Windows 10 x64 (build 19045)
#> 
#> Matrix products: default
#> 
#> 
#> locale:
#> [1] LC_COLLATE=C                          
#> [2] LC_CTYPE=English_United States.utf8   
#> [3] LC_MONETARY=English_United States.utf8
#> [4] LC_NUMERIC=C                          
#> [5] LC_TIME=English_United States.utf8    
#> 
#> time zone: America/Phoenix
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] mlr3_0.18.0        lgr_0.4.4          animint2_2024.1.24 data.table_1.15.99
#> 
#> loaded via a namespace (and not attached):
#>  [1] future.apply_1.11.2      gtable_0.3.4             jsonlite_1.8.8          
#>  [4] highr_0.10               compiler_4.4.0           crayon_1.5.2            
#>  [7] rpart_4.1.23             Rcpp_1.0.12              stringr_1.5.1           
#> [10] parallel_4.4.0           jquerylib_0.1.4          globals_0.16.3          
#> [13] scales_1.3.0             uuid_1.2-0               RhpcBLASctl_0.23-42     
#> [16] yaml_2.3.8               fastmap_1.1.1            R6_2.5.1                
#> [19] plyr_1.8.9               mlr3tuning_0.19.2        labeling_0.4.3          
#> [22] knitr_1.46               palmerpenguins_0.1.1     backports_1.4.1         
#> [25] checkmate_2.3.1          future_1.33.2            munsell_0.5.1           
#> [28] paradox_0.11.1           bslib_0.7.0              mlr3measures_0.5.0      
#> [31] rlang_1.1.3              stringi_1.8.3            cachem_1.0.8            
#> [34] xfun_0.43                mlr3misc_0.15.0          sass_0.4.9              
#> [37] RJSONIO_1.3-1.9          cli_3.6.2                magrittr_2.0.3          
#> [40] digest_0.6.34            grid_4.4.0               bbotk_0.7.3             
#> [43] nc_2024.2.21             lifecycle_1.0.4          evaluate_0.23           
#> [46] glue_1.7.0               farver_2.1.1             listenv_0.9.1           
#> [49] codetools_0.2-19         parallelly_1.37.1        colorspace_2.1-0        
#> [52] reshape2_1.4.4           rmarkdown_2.26           mlr3resampling_2024.4.14
#> [55] tools_4.4.0              htmltools_0.5.8.1