ykang / fuma

Forecast uncertainty based on model averaging

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

fuma

Forecast uncertainty based on model averaging.

The R package fuma provides implementations of the uncertainty estimation of feature-based time series forecasts, see our paper for the details.

Installation

You can install the package fuma from GitHub Repository with:

devtools::install_github("xqnwang/fuma")

Usage

This part explains how to reproduce the results for our working paper. The feature-based framework for the uncertainty estimation is applied in this part to obtain the forecasts and prediction intervals of M4 dataset for the confidence level 95%.

The reference dataset and test dataset

To forecast M4 dataset, reference dataset that originates from the same population with the M4 dataset in feature spaces is generated by GRATIS. Kang, Hyndman & Li (2020) demonstrated the superior diversity and coverage of the generated time series compared with M4 dataset in two-dimensional feature spaces.

# packages we need
library(fuma)
library(M4comp2018)
library(M4metalearning)
library(forecast)
library(magrittr)
library(parallel)
library(doParallel)
library(foreach)
ncores <- detectCores()

# summarize the distribution of sample size on the M4 dataset
yearly_M4 <- Filter(function(l) toupper(l$period) == "YEARLY", M4)
quarterly_M4 <- Filter(function(l) toupper(l$period) == "QUARTERLY", M4)
monthly_M4 <- Filter(function(l) toupper(l$period) == "MONTHLY", M4)
l_y <- sapply(yearly_M4, function(lentry)lentry$n)
l_q <- sapply(quarterly_M4, function(lentry)lentry$n)
l_m <- sapply(monthly_M4, function(lentry)lentry$n)

# reference dataset (generated by GRATIS)
set.seed(2020-09-1)
train_y <- ts_generate(n.ts = 20000, freq = 1, length = l_y, h = 6, 
                       parallel = TRUE, num.cores = ncores)
set.seed(2020-09-2)
train_q <- ts_generate(n.ts = 20000, freq = 4, length = l_q, h = 8,
                       parallel = TRUE, num.cores = ncores)
set.seed(2020-09-3)
train_m <- ts_generate(n.ts = 40000, freq = 12, length = l_m, h = 18,
                       parallel = TRUE, num.cores = ncores)
train <- append(train_y, train_q) %>% append(train_m)

# test dataset (the yearly, quarterly and monthly subsets of M4 dataset)
test <- Filter(function(l){
  l$period == "Yearly" | l$period == "Quarterly" | l$period =="Monthly"
}, M4)

Train benchmark models on reference dataset

Benchmark models considered in this document are auto_arima, ets, tbats, stlm_ar, rw_drift, thetaf, naive and snaive.

# train benchmark models and obtain forecasts and prediction intervals
train_forec <- ts_forec(dataset = train, methods_list(), level = c(80, 95), 
                        parallel = TRUE, num.cores = ncores)
# evaluate forecasting performance
train_scores <- calc_scores(train_forec, parallel = TRUE, num.cores = ncores) 

# extract the MSIS scores
level <- 95
train_msis <- extract_msis(train_scores, level = level) 

Extract time series features from reference dataset

# scale the time series
train_scaledx <- lapply(train, function (lentry){
  mu <- mean(lentry$x)
  sigma <- sd(lentry$x)
  x <- (lentry$x - mu)/sigma
  lentry[names(lentry) == "x"] <- list(x)
  lentry
})

# calculate features
train_feat0 <- M4metalearning::THA_features(train_scaledx)
train_feat <- t(sapply(train_feat0, function (lentry) {
  seriesdata <- c(as.numeric(lentry$features))
  names(seriesdata) <- c(names(lentry$features))
  seriesdata
}))

Linking features with interval forecasting performance

The relationship between features and interval forecasting accuracies is captured by generalized additive model in this paper because of its properties of interpretability, flexibility, automation and regularization.

# combine features and MSIS values
train_fm <- `names<-` (append(list(train_feat), list(train_msis)), 
                       c("feat", "msis"))

# experimental setup
X <- as.data.frame(train_fm$feat)
Y <- as.data.frame(train_fm$msis)
X$seasonal_period_q <- ifelse(X$seasonal_period == 4, 1, 0)
X$seasonal_period_m <- ifelse(X$seasonal_period == 12, 1, 0)
X <- subset(X, select = -seasonal_period)

# GAM model training
gam_fit <- gam.fun(X, Y, LogY = TRUE, k = 5, parallel = TRUE, num.cores = ncores)

Optimal threshold search

This part aims at searching optimal threshold ratio for yearly, quarterly and monthly series from the reference dataset.

# get fitted value
gam_fitvalue <- matrix(NA, nrow = nrow(X), ncol = ncol(Y)) %>% data.frame()
rownames(gam_fitvalue) <- rownames(X)
colnames(gam_fitvalue) <- colnames(Y)
for (i in 1:length(gam_fit)){
  gam_fitvalue[, i] <- gam_fit[[i]]$fitted.values
}

# adjusted softmax transformation
gam_prob <- softmax.fun(gam_fitvalue)

# search optimal threshold
init_ratio <- seq(0.1, 1, 0.1)
gam.mean_threshold <- choose.threshold(dataset = train_forec, fitProb = gam_prob, 
                                       ratio = init_ratio, combine="mean", 
                                       level = level, parallel = TRUE, num.cores = ncores) 
gam.weighted_threshold <- choose.threshold(dataset = train_forec, fitProb = gam_prob, 
                                           ratio = init_ratio, combine="weighted", 
                                           level = level, parallel = TRUE, num.cores = ncores) 

# optimal threshold
gam.mean_ratio <- c(gam.mean_threshold$threshold_y, 
                    gam.mean_threshold$threshold_q, 
                    gam.mean_threshold$threshold_m)
gam.weighted_ratio <- c(gam.weighted_threshold$threshold_y, 
                        gam.weighted_threshold$threshold_q, 
                        gam.weighted_threshold$threshold_m)

Forecast test dataset

# extract features from test dataset
test_scaledx <- lapply(test, function (lentry){
  mu <- mean(lentry$x)
  sigma <- sd(lentry$x)
  x <- (lentry$x - mu)/sigma
  lentry[names(lentry) == "x"] <- list(x)
  lentry
})
test_feat0 <- THA_features(test_scaledx)
test_feat <- t(sapply(test_feat0, function (lentry) {
  seriesdata <- c(as.numeric(lentry$features))
  names(seriesdata) <- c(names(lentry$features))
  seriesdata
}))

# dummy out categorical features
Xtest <- as.data.frame(test_feat)
Xtest$seasonal_period_q <- ifelse(Xtest$seasonal_period == 4, 1, 0)
Xtest$seasonal_period_m <- ifelse(Xtest$seasonal_period == 12, 1, 0)
Xtest <- subset(Xtest, select = -seasonal_period)

# predicted values of GAMs and weight assignment
gam_pre <- matrix(NA, nrow = nrow(Xtest), ncol = ncol(Y)) %>% data.frame()
rownames(gam_pre) <- rownames(Xtest)
colnames(gam_pre) <- colnames(Y)
for (i in 1:length(gam_fit)){
  gam_pre[, i] <- predict(gam_fit[[i]], newdata = Xtest)
}
gam_preprob <- softmax.fun(gam_pre)

# forecasts of benchmark methods
test_forec <- ts_forec(dataset = test, methods_list(), 
                       level = 95, parallel = TRUE, num.cores = ncores)

# OurMethod(mean)
fuma_forec <- comb_forec(test_forec, weightprob = gam_preprob,
                         Threshold = gam.mean_ratio, level = level,
                         combine = "mean", methodname = "fuma(mean)",
                         show.methods = FALSE, parallel = TRUE, num.cores = ncores)

# OurMethod(weighted)
fuma_forec <- comb_forec(fuma_forec, weightprob = gam_preprob,
                         Threshold = gam.weighted_ratio, level = level,
                         combine = "weighted", methodname = "fuma(weighted)",
                         show.methods = FALSE, parallel = TRUE, num.cores = ncores)

# OurMethod(allweighted)
all_ratio <- c(-1, -1, -1) 
fuma_forec <- comb_forec(fuma_forec, weightprob = gam_preprob,
                         Threshold = all_ratio, level = level,
                         combine = "weighted", methodname = "fuma(allweighted)",
                         show.methods = FALSE, parallel = TRUE, num.cores = ncores)

References

  • Xiaoqian Wang, Yanfei Kang, Fotios Petropoulos & Feng Li (2021): The uncertainty estimation of feature-based forecast combinations, Journal of the Operational Research Society, DOI: 10.1080/01605682.2021.1880297, Working paper.

About

Forecast uncertainty based on model averaging


Languages

Language:R 100.0%