aefdz / localFDA

Localization processes for functional data analysis. Software companion for the paper “Localization processes for functional data analysis” by Elías, A., Jiménez, R., and Yukich, J. (2020)

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

localFDA

License Travis build status

Overview

Software companion for the paper “Localization processes for functional data analysis” by Elías, Antonio, Jiménez, Raúl, and Yukich, Joe, (2020) <arXiv:2007.16059>. It provides the code for computing localization processes and localization distances and their application to classification and outlier detection problems.

Installation

#install the package
devtools::install_github("aefdz/localFDA")
##       v  checking for file 'C:\Users\anton\AppData\Local\Temp\Rtmp4617Sq\remotes2e00503a197c\aefdz-localFDA-25b0d40/DESCRIPTION' (425ms)
##       -  preparing 'localFDA':
##    checking DESCRIPTION meta-information ...     checking DESCRIPTION meta-information ...   v  checking DESCRIPTION meta-information
##       -  checking for LF line-endings in source and make files and shell scripts
##   -  checking for empty or unneeded directories
##       -  looking to see if a 'data/datalist' file should be added
##       -  building 'localFDA_0.0.0.9000.tar.gz'
##      
## 
#load the package
library(localFDA)

Test usage

Load the example data and plot it.

X <- exampleData
n <- ncol(X)
p <- nrow(X)
t <- as.numeric(rownames(X))

#plot the data set
df_functions <- data.frame(ids = rep(colnames(X), each = p),
                           y = c(X),
                           x = rep(t, n)
                           )

functions_plot <- ggplot(df_functions) + 
                  geom_line(aes(x = x, y = y, group = ids, color = ids), 
                            color = "black", alpha = 0.25) + 
                  xlab("t") + theme(legend.position = "none")


functions_plot

Compute kth empirical localization processes

Empirical version of Equation (1) of the paper. For one focal,

focal <- "1"

localizarionProcesses_focal <- localizationProcesses(X, focal)$lc

Plot localization processes of order 1, 50, 100 and 200:

df_lc <- data.frame(k = rep(colnames(localizarionProcesses_focal), each = p),
                           y = c(localizarionProcesses_focal),
                           x = rep(t, n-1)
                           )

lc_plots <- list()
ks <- c(1, 50, 100, 200)

for(i in 1:4){
  lc_plots[[i]] <- functions_plot + 
                   geom_line(data = filter(df_lc, k == paste0("k=", ks[i])), 
                             aes(x = x, y = y, group = k), 
                             color = "blue", size = 1) +
                   geom_line(data = filter(df_functions, ids == focal), 
                             aes(x = x, y = y, group = ids), 
                             color = "red", linetype = "dashed", size = 1)+
                   ggtitle(paste("k = ", ks[i]))
}

wrap_plots(lc_plots)

Compute kth empirical localization distances

Equation (18) of the paper. For one focal,

localizationDistances_focal <- localizationDistances(X, focal)

head(localizationDistances_focal)
##          k=1          k=2          k=3          k=4          k=5          k=6 
## 0.0005082926 0.0011346495 0.0017636690 0.0023955745 0.0030095117 0.0035089220

Plot the localization distances:

df_ld <- data.frame(k = names(localizationDistances_focal),
                           y = localizationDistances_focal,
                           x = 1:c(n-1)
                           )


ldistances_plot <- ggplot(df_ld, aes(x = x, y = y)) + 
                   geom_point() + 
                   ggtitle("Localization distances for one focal") + 
                   xlab("kth") + ylab("L")

ldistances_plot

Sample μ and σ

localizationStatistics_full <- localizationStatistics(X, robustify = TRUE)

#See the mean and sd estimations for k = 1, 100, 200, 400, 600

localizationStatistics_full$trim_mean[c(1, 100, 200, 400, 600)]
##         k=1       k=100       k=200       k=400       k=600 
## 0.001083517 0.098465426 0.184940365 0.350528860 0.526580274
localizationStatistics_full$trim_sd[c(1, 100, 200, 400, 600)]
##          k=1        k=100        k=200        k=400        k=600 
## 0.0005326429 0.0329170846 0.0490732397 0.0686018224 0.0806314699

Classification

X <- classificationData

ids_training <- sample(colnames(X), 90)
ids_testing <- setdiff(colnames(X), ids_training)

trainingSample <- X[,ids_training]
testSample <- X[,ids_testing]; colnames(testSample) <- NULL #blind 
classNames <- c("G1", "G2")

classification_results <- localizationClassifier(trainingSample, testSample, classNames, k_opt = 3)

checking <- data.frame(real_classs = ids_testing, 
                      predicted_class =classification_results$test$predicted_class)

checking
##    real_classs predicted_class
## 1        12_G1              G1
## 2        14_G1              G1
## 3        21_G1              G1
## 4        44_G1              G1
## 5        54_G2              G2
## 6        56_G2              G2
## 7        72_G2              G2
## 8        81_G2              G2
## 9        94_G2              G2
## 10      100_G2              G2

Outlier detection

X <- outlierData

outliers <- outlierLocalizationDistance(X, localrule = 0.95, whiskerrule = 1.5)

outliers$outliers_ld_rule
## [1] "1_magnitude" "1_shape"     "2_magnitude" "2_shape"

Plot results,

df_functions <- data.frame(ids = rep(colnames(X), each = nrow(X)),
                           y = c(X),
                           x = rep(seq(from = 0, to = 1, length.out = nrow(X)), ncol(X)))
                           

functions_plot <- ggplot(df_functions) + 
                  geom_line(aes(x = x, y = y, group = ids), 
                            color = "black") + 
                  xlab("t") + 
  theme(legend.position = "bottom")+
                  geom_line(data = df_functions[df_functions$ids %in% outliers$outliers_ld_rule,], aes(x = x, y = y, group = ids, color = ids), size = 1) +
  guides(color = guide_legend(title="Detected outliers"))

functions_plot 

References

Elías, Antonio, Jiménez, Raúl and Yukich, Joe (2020). Localization processes for functional data analysis [https://arxiv.org/abs/2007.16059]https://arxiv.org/abs/2007.16059.

About

Localization processes for functional data analysis. Software companion for the paper “Localization processes for functional data analysis” by Elías, A., Jiménez, R., and Yukich, J. (2020)

License:GNU General Public License v3.0


Languages

Language:R 100.0%