library(reservoirnet)
library(dplyr)
library(ggplot2)
This vignette aims to find the best hyperparameters using randomsearch strategy. We will use the same data as in the vignette 01 basic usage and learn how to find the best set of hyperparameters.
We first load the data :
data("dfCovid")
= 14
dist_forecast
= as.Date("2022-01-01") traintest_date
Then we smooth the data to avoid huge variability of RT-PCR :
<- dfCovid %>%
dfOutcome # outcome at 14 days
mutate(outcome = lead(x = hosp, n = dist_forecast),
outcomeDate = date + dist_forecast) %>%
# rolling average for iptcc and positive_pcr
mutate_at(.vars = c("Positive", "Tested"),
.funs = function(x) slider::slide_dbl(.x = x,
.before = 6,
.f = mean))
Now that we have our data ready, we can plot those :
%>%
dfOutcome ::pivot_longer(cols = c("hosp", "Positive", "Tested")) %>%
tidyr::ggplot(mapping = aes(x = date, y = value)) +
ggplot2geom_line() +
facet_grid(name ~ ., scales = "free_y") +
theme_bw() +
geom_vline(mapping = aes(color = "train-test sets", xintercept = traintest_date)) +
labs(color = "") +
theme(legend.position = "bottom")
In order to chose the best set of hyperparameters (leaking rate, input scaling, spectral radius and ridge penalty) on the train set we are going to use an accumulate forward procedure. Basically, during the year 2021, we are going to train the model each 6 months and evaluate the forecast on the next 3 months. This procedure will be repeated several times with different sets of hyperparameters each time.
First we set the periods of training and evaluation :
# dates of training
<- as.Date(c("2021-01-01", "2021-06-01"))
vec_train_sets # dates of evaluation
<- vec_train_sets+dist_forecast
vec_test_sets_start <- c(vec_test_sets_start[2:length(vec_test_sets_start)], traintest_date-1)
vec_test_sets_end # get everything in a table
= data.frame(train_date = vec_train_sets,
dfaccumulateForward test_start = vec_test_sets_start,
test_end = vec_test_sets_end)
Then we set the objective functions computing the mse for the chosen of hyperparameters :
<- function(ridge,
fct_objective
leaking_rate,
input_scaling,
spectral_radius,
dfaccumulateForward){##### reservoir architecture
# set reservoir
<- reservoirnet::createNode(nodeType = "Reservoir",
reservoir units = 500,
lr = 0.7,
sr = 1,
input_scaling = 1)
# set readout
<- reservoirnet::createNode(nodeType = "Ridge", ridge = 0.1)
readout # connect them
<- reservoirnet::link(reservoir, readout)
model
##### evaluate model
<- apply(dfaccumulateForward,
dfPredictions MARGIN = 1,
FUN = function(row_accumulate_forward){
fct_performance_period(model = model,
dfOutcome = dfOutcome,
train_date = as.Date(row_accumulate_forward["train_date"]),
test_start = as.Date(row_accumulate_forward["test_start"]),
test_end = as.Date(row_accumulate_forward["test_end"]))
%>%
}) bind_rows()
##### get performance
= dfPredictions %>%
mse mutate(squared_error = (pred-outcome)^2) %>%
pull(squared_error) %>%
mean(.)
return(mse)
}
<- function(model,
fct_performance_period
train_date,
test_start,
test_end,
dfOutcome){##### train and test set
# train set
<- dfOutcome %>% filter(outcomeDate <= train_date) %>% select(outcome)
yTrain <- dfOutcome %>% filter(outcomeDate <= train_date) %>% select(hosp, Positive, Tested)
xTrain # test set
<- dfOutcome %>% filter(outcomeDate <= test_end) %>% select(hosp, Positive, Tested)
xTest <- dfOutcome %>%
yTest filter(outcomeDate <= test_end) %>%
select(outcomeDate, outcome) %>%
mutate(eval_period = outcomeDate >= test_start)
##### preprocessing of the data
# standardise based on training set values
<- apply(xTrain,
ls_fct_stand MARGIN = 2,
FUN = function(x) function(feature) return(feature/(max(x))))
<- xTrain
xTrainstand <- xTest
xTeststand lapply(X = names(ls_fct_stand),
FUN = function(x){
<<- ls_fct_stand[[x]](feature = xTrain[,x])
xTrainstand[,x] <<- ls_fct_stand[[x]](feature = xTest[,x])
xTeststand[,x] return()
})# convert to array
<- lapply(list(yTrain = yTrain,
lsdf xTrain = xTrainstand,
xTest = xTeststand),
function(x) as.array(as.matrix(x)))
##### fit reservoir on train set
<- reservoirnet::reservoirR_fit(node = model,
fit X = lsdf$xTrain,
Y = lsdf$yTrain,
warmup = 30,
reset = TRUE)
##### predict with the reservoir
<- reservoirnet::predict_seq(node = fit$fit,
vec_pred X = lsdf$xTest,
reset = TRUE)
<- yTest %>%
dfPredictions mutate(pred = vec_pred) %>%
filter(eval_period) %>%
select(outcomeDate, outcome, pred)
return(dfPredictions)
}
To generate the hyperparameters we use log-uniform generations. This is easily done using the rloguniform and random_search_hyperparam functions. For instance we have :
random_search_hyperparam(
n = 50,
ls_fct = list(
ridge = function(n)
rloguniform(n = n, min = 1e-10, max = 1e-1),
input_scaling = function(n)
1,
spectral_radius = function(n)
rloguniform(n = n, min = 1e-10, max = 1e5),
leaking_rate = function(n)
0.9
)%>%
) head()
## # A tibble: 6 × 5
## search_id ridge input_scaling spectral_radius leaking_rate
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.0000000245 1 1.46e-3 0.9
## 2 2 0.000000223 1 8.28e+2 0.9
## 3 3 0.0000143 1 3.73e-4 0.9
## 4 4 0.0149 1 4.70e-7 0.9
## 5 5 0.00000000653 1 1.15e-9 0.9
## 6 6 0.0122 1 3.10e-9 0.9
To find the right set of hyperparameters, we will keep the ridge hyperparameter random between 1e-10 and 1e-1 and vary other hyperparameters 2 by 2. For each set of hyperparameters, we will run 2 experiment and take the mean mse of the 2. At each step we will run 30 experiments to reduce computation time.
For a real case, it would be better to increase the number of experiment and of repetition to better explore the hyperparameter space.
<- random_search_hyperparam(n = 30,
dfHyperparam ls_fct = list(ridge = function(n) rloguniform(n = n, min = 1e-10, max = 1e-1),
input_scaling = function(n) rloguniform(n = n, min = 1e-5, max = 1e2),
spectral_radius = function(n) rloguniform(n = n, min = 1e-5, max = 1e2),
leaking_rate = function(n) 0.7)) %>%
# replicate 2 times
replicate(n = 2, simplify = FALSE) %>%
bind_rows() %>%
::rowid_to_column(var = "search_id_master")
tibble
<- apply(X = dfHyperparam,
vecMSE MARGIN = 1,
FUN = function(row_hp){
fct_objective(ridge = row_hp["ridge"],
leaking_rate = row_hp["leaking_rate"],
input_scaling = row_hp["input_scaling"],
spectral_radius = row_hp["spectral_radius"],
dfaccumulateForward = dfaccumulateForward)
})
= dfHyperparam %>%
dfPerf select(search_id, search_id_master) %>%
mutate(mse = vecMSE) %>%
group_by(search_id) %>%
summarise(mse = mean(mse)) %>%
left_join(dfHyperparam %>% select(-search_id_master) %>% distinct(),
by = "search_id")
We can now plot the performance using the adequate functions :
plot_2x2_perf(dfPerf %>% select(perf = mse, spectral_radius, input_scaling),
perf_lab = "MSE", trans = "identity")
plot_marginal_perf(dfPerf %>% select(perf = mse, spectral_radius, input_scaling),
perf_lab = "MSE")
There is no clear area of better performance and for a real world application, we should probably increase the number of explored hyperparameter set. Nevertheless, the area where spectral radius and input scaling are equal to 1 seems to have better performance.
We can now move on to tune the leaking rate and the ridge hyperparameters.
<- random_search_hyperparam(n = 30,
dfHyperparam ls_fct = list(ridge = function(n) rloguniform(n = n, min = 1e-10, max = 1e-1),
input_scaling = function(n) 1,
spectral_radius = function(n) 1,
leaking_rate = function(n) rloguniform(n = n, min = 1e-3, max = 1))) %>%
# replicate 2 times
replicate(n = 2, simplify = FALSE) %>%
bind_rows() %>%
::rowid_to_column(var = "search_id_master")
tibble
<- apply(X = dfHyperparam,
vecMSE MARGIN = 1,
FUN = function(row_hp){
fct_objective(ridge = row_hp["ridge"],
leaking_rate = row_hp["leaking_rate"],
input_scaling = row_hp["input_scaling"],
spectral_radius = row_hp["spectral_radius"],
dfaccumulateForward = dfaccumulateForward)
})
= dfHyperparam %>%
dfPerf select(search_id, search_id_master) %>%
mutate(mse = vecMSE) %>%
group_by(search_id) %>%
summarise(mse = mean(mse)) %>%
left_join(dfHyperparam %>% select(-search_id_master) %>% distinct(),
by = "search_id")
We can now plot the performance using the adequate functions :
plot_2x2_perf(dfPerf %>% select(perf = mse, leaking_rate, ridge),
perf_lab = "MSE", trans = "identity")
plot_marginal_perf(dfPerf %>% select(perf = mse, leaking_rate, ridge),
perf_lab = "MSE")
Again, for a real world application, we should probably explore a larger set of hyperparameter. Here, we can consider that a leaking rate of 0.5 and a ridge penalty of 1e-3 seem to provide overall best performance. A finer tuning could be done but we stop here for this vignette using random search to find the best set of hyperparameters.