Forecast uncertainty based on model averaging.
The R package fuma
provides implementations of the uncertainty estimation of feature-based time series forecasts, see our paper for the details.
You can install the package fuma
from GitHub Repository with:
devtools::install_github("xqnwang/fuma")
This part explains how to reproduce the results for our working paper. The feature-based framework for the uncertainty estimation is applied in this part to obtain the forecasts and prediction intervals of M4 dataset for the confidence level 95%.
To forecast M4 dataset, reference dataset that originates from the same population with the M4 dataset in feature spaces is generated by GRATIS. Kang, Hyndman & Li (2020) demonstrated the superior diversity and coverage of the generated time series compared with M4 dataset in two-dimensional feature spaces.
# packages we need
library(fuma)
library(M4comp2018)
library(M4metalearning)
library(forecast)
library(magrittr)
library(parallel)
library(doParallel)
library(foreach)
ncores <- detectCores()
# summarize the distribution of sample size on the M4 dataset
yearly_M4 <- Filter(function(l) toupper(l$period) == "YEARLY", M4)
quarterly_M4 <- Filter(function(l) toupper(l$period) == "QUARTERLY", M4)
monthly_M4 <- Filter(function(l) toupper(l$period) == "MONTHLY", M4)
l_y <- sapply(yearly_M4, function(lentry)lentry$n)
l_q <- sapply(quarterly_M4, function(lentry)lentry$n)
l_m <- sapply(monthly_M4, function(lentry)lentry$n)
# reference dataset (generated by GRATIS)
set.seed(2020-09-1)
train_y <- ts_generate(n.ts = 20000, freq = 1, length = l_y, h = 6,
parallel = TRUE, num.cores = ncores)
set.seed(2020-09-2)
train_q <- ts_generate(n.ts = 20000, freq = 4, length = l_q, h = 8,
parallel = TRUE, num.cores = ncores)
set.seed(2020-09-3)
train_m <- ts_generate(n.ts = 40000, freq = 12, length = l_m, h = 18,
parallel = TRUE, num.cores = ncores)
train <- append(train_y, train_q) %>% append(train_m)
# test dataset (the yearly, quarterly and monthly subsets of M4 dataset)
test <- Filter(function(l){
l$period == "Yearly" | l$period == "Quarterly" | l$period =="Monthly"
}, M4)
Benchmark models considered in this document are auto_arima, ets, tbats, stlm_ar, rw_drift, thetaf, naive and snaive.
# train benchmark models and obtain forecasts and prediction intervals
train_forec <- ts_forec(dataset = train, methods_list(), level = c(80, 95),
parallel = TRUE, num.cores = ncores)
# evaluate forecasting performance
train_scores <- calc_scores(train_forec, parallel = TRUE, num.cores = ncores)
# extract the MSIS scores
level <- 95
train_msis <- extract_msis(train_scores, level = level)
# scale the time series
train_scaledx <- lapply(train, function (lentry){
mu <- mean(lentry$x)
sigma <- sd(lentry$x)
x <- (lentry$x - mu)/sigma
lentry[names(lentry) == "x"] <- list(x)
lentry
})
# calculate features
train_feat0 <- M4metalearning::THA_features(train_scaledx)
train_feat <- t(sapply(train_feat0, function (lentry) {
seriesdata <- c(as.numeric(lentry$features))
names(seriesdata) <- c(names(lentry$features))
seriesdata
}))
The relationship between features and interval forecasting accuracies is captured by generalized additive model in this paper because of its properties of interpretability, flexibility, automation and regularization.
# combine features and MSIS values
train_fm <- `names<-` (append(list(train_feat), list(train_msis)),
c("feat", "msis"))
# experimental setup
X <- as.data.frame(train_fm$feat)
Y <- as.data.frame(train_fm$msis)
X$seasonal_period_q <- ifelse(X$seasonal_period == 4, 1, 0)
X$seasonal_period_m <- ifelse(X$seasonal_period == 12, 1, 0)
X <- subset(X, select = -seasonal_period)
# GAM model training
gam_fit <- gam.fun(X, Y, LogY = TRUE, k = 5, parallel = TRUE, num.cores = ncores)
This part aims at searching optimal threshold ratio for yearly, quarterly and monthly series from the reference dataset.
# get fitted value
gam_fitvalue <- matrix(NA, nrow = nrow(X), ncol = ncol(Y)) %>% data.frame()
rownames(gam_fitvalue) <- rownames(X)
colnames(gam_fitvalue) <- colnames(Y)
for (i in 1:length(gam_fit)){
gam_fitvalue[, i] <- gam_fit[[i]]$fitted.values
}
# adjusted softmax transformation
gam_prob <- softmax.fun(gam_fitvalue)
# search optimal threshold
init_ratio <- seq(0.1, 1, 0.1)
gam.mean_threshold <- choose.threshold(dataset = train_forec, fitProb = gam_prob,
ratio = init_ratio, combine="mean",
level = level, parallel = TRUE, num.cores = ncores)
gam.weighted_threshold <- choose.threshold(dataset = train_forec, fitProb = gam_prob,
ratio = init_ratio, combine="weighted",
level = level, parallel = TRUE, num.cores = ncores)
# optimal threshold
gam.mean_ratio <- c(gam.mean_threshold$threshold_y,
gam.mean_threshold$threshold_q,
gam.mean_threshold$threshold_m)
gam.weighted_ratio <- c(gam.weighted_threshold$threshold_y,
gam.weighted_threshold$threshold_q,
gam.weighted_threshold$threshold_m)
# extract features from test dataset
test_scaledx <- lapply(test, function (lentry){
mu <- mean(lentry$x)
sigma <- sd(lentry$x)
x <- (lentry$x - mu)/sigma
lentry[names(lentry) == "x"] <- list(x)
lentry
})
test_feat0 <- THA_features(test_scaledx)
test_feat <- t(sapply(test_feat0, function (lentry) {
seriesdata <- c(as.numeric(lentry$features))
names(seriesdata) <- c(names(lentry$features))
seriesdata
}))
# dummy out categorical features
Xtest <- as.data.frame(test_feat)
Xtest$seasonal_period_q <- ifelse(Xtest$seasonal_period == 4, 1, 0)
Xtest$seasonal_period_m <- ifelse(Xtest$seasonal_period == 12, 1, 0)
Xtest <- subset(Xtest, select = -seasonal_period)
# predicted values of GAMs and weight assignment
gam_pre <- matrix(NA, nrow = nrow(Xtest), ncol = ncol(Y)) %>% data.frame()
rownames(gam_pre) <- rownames(Xtest)
colnames(gam_pre) <- colnames(Y)
for (i in 1:length(gam_fit)){
gam_pre[, i] <- predict(gam_fit[[i]], newdata = Xtest)
}
gam_preprob <- softmax.fun(gam_pre)
# forecasts of benchmark methods
test_forec <- ts_forec(dataset = test, methods_list(),
level = 95, parallel = TRUE, num.cores = ncores)
# OurMethod(mean)
fuma_forec <- comb_forec(test_forec, weightprob = gam_preprob,
Threshold = gam.mean_ratio, level = level,
combine = "mean", methodname = "fuma(mean)",
show.methods = FALSE, parallel = TRUE, num.cores = ncores)
# OurMethod(weighted)
fuma_forec <- comb_forec(fuma_forec, weightprob = gam_preprob,
Threshold = gam.weighted_ratio, level = level,
combine = "weighted", methodname = "fuma(weighted)",
show.methods = FALSE, parallel = TRUE, num.cores = ncores)
# OurMethod(allweighted)
all_ratio <- c(-1, -1, -1)
fuma_forec <- comb_forec(fuma_forec, weightprob = gam_preprob,
Threshold = all_ratio, level = level,
combine = "weighted", methodname = "fuma(allweighted)",
show.methods = FALSE, parallel = TRUE, num.cores = ncores)
- Xiaoqian Wang, Yanfei Kang, Fotios Petropoulos & Feng Li (2021): The uncertainty estimation of feature-based forecast combinations, Journal of the Operational Research Society, DOI: 10.1080/01605682.2021.1880297, Working paper.