The goal of svdcov
is to calculate the covariance matrix of rectangular data
using SVD or shrinkage methods.
You can install the released version of svdcov from github with:
devtools::intall_github("github.io/jtannen/svdcov")
library(svdcov)
# Consider a block-diagonal example.
cov <- matrix(0, 10, 10)
cov[row(cov) <=5 & col(cov) <= 5] <- 0.5
cov[row(cov) > 5 & col(cov) > 5] <- 0.5
diag(cov) <- 1
data <- MASS::mvrnorm(
n=100,
mu=1:10,
Sigma=cov
)
data <- t(data)
res <- get_svd(data, n_svd=2)
res@row_cov
get_fitted(res)
sample_from_posterior(
res,
obs=c(1.42, 7.34),
obs_id=c(1, 6)
)
## Example with Philadelphia elections data
library(dplyr)
library(sf)
library(ggplot2)
data("phila_elections")
data("phila_divs")
phila_elections <- phila_elections %>%
filter(
!grepl("WRITE(\\s|-)IN", candidate, ignore.case=T),
substr(warddiv, 1, 2) != "99", # overseas
is.na(district) # city-wide races
)
competitive_races <- phila_elections %>%
group_by(office, election_type, year, candidate) %>%
summarise(votes=sum(votes), .groups="drop") %>%
group_by(office, election_type, year) %>%
summarise(is_competitive = sum(votes > 100) > 2) %>%
filter(is_competitive)
# mat is a matrix with division rows, and candidate columns
mat <- phila_elections %>%
inner_join(competitive_races) %>%
mutate(column_name = paste(candidate, office, party, election_type, year, sep="::")) %>%
group_by(office, year, election_type, warddiv) %>%
mutate(log_pvote = log((votes + 1) / sum(votes+1))) %>%
ungroup() %>%
pivot_wider(
id_cols = "warddiv",
names_from="column_name",
values_from="log_pvote",
values_fill=list(log_pvote=log(0.001))
)
mat <- mat %>% tibble::column_to_rownames("warddiv")
mat <- as.matrix(mat)
row_ward <- substr(row.names(mat), 1, 2)
svd_par <- get_svd(
mat,
n_svd=3,
row_groups=row_ward, ## cluster extra variance by ward
is_logged=TRUE
)
ggplot(
divs %>% left_join(svd_par@row_scores, by=c("warddiv"="row"))
) +
geom_sf(aes(fill=score.1), color=NA) +
scale_fill_gradient2() +
theme_minimal()
svd_par@col_scores %>% arrange(desc(score.1)) %>% head()
## Suppose we have a weak prior that a candidate will receive 25% of the vote,
## and then see they received 50% of the vote in "27-14".
## How will the do in "01-01"?
sample <- sample_from_posterior(
svd_par,
obs=c(log(0.5), log(0.5)),
obs_id=c("01-02", "01-03"),
col_mean_prior=log(0.25),
col_mean_prior_var=1
)
# Note: the model doesn't know these are percents, so some are > 1.
sample %>%
filter(row=="01-01") %>%
with(hist(exp(value)))