Miscellaneous functions and documentation.
After making changes use rmarkdown::render_site()
. Then commit + push.
- push all function changes to github
- change the version number in DESCRIPTION
- update readme.md, news.md
- push these changes
- devtools::check(cran = TRUE)
- devtools::check_win_release()
- update cran-comments.md with the output of cran checks
- push this change and any remaining changes to github
- devtools::submit_cran()
- NOW CHECK EMAIL CONFIRM SUBMISSION via email (maintiner stable email)
- once accepted by CRAN, tag a release with the same version number as the updated DESCRIPTION
- tag release instructions: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository
- reference: https://r-pkgs.org/release.html
- push all function changes to
- change the version number in DESCRIPTION
- update readme.md, news.md
- push these changes
- pkgdown::build_site()
- push to github
- tag release instructions: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository
extract variable T statistics on a matrix, with a grouping factor as the first column. Useful for sanirt check on direction of variable importance.
Assuming data structure of dataframe d
looks like:
group Var1 Var2
"low" 5 3
"low" 5 5
"high" 3 7
"high" 3 8
code
# get t stats of individual features
do.ttest = function(x, y) {
ttest = t.test(x ~ y)
out <- c(tstat = ttest$statistic, pval = ttest$p.value)
out
}
tval = apply(d[ , -c(1)], MARGIN = 2, FUN = do.ttest, y = d$group) %>%
t() %>%
as.data.frame() %>%
rownames_to_column('feature')
Quick visualization of group differences for select vars in the matrix.
d.long = d %>%
gather(variable, value, Var1:VarN)
p = ggplot(data = d.long, aes(x = value, y = group, fill = group, color = group)) +
geom_boxplot(size = 0.2, outlier.size = 0.1) +
facet_grid(rows = vars(variable), scales = 'free', space = 'free') +
theme(strip.text.y = element_text(angle = 0))
# save long fig size
Given some data of variables (cols) with observations (rows), use Hmisc::rcorr() to get a symmetric correlation matrix of each pairwise comparison between columns. object returned from rcorr contains a correlation matrix of rho values, p values and n valuee. The function below will calculate the adjusted p value.
# given some correlation matrix object returned by hmisc::rcorr(d)
# d containd columns; all pairwise correlations computed
p.adjust.cormat = function(hmisc.cor, method = 'fdr'){
stopifnot(isTRUE(isSymmetric(hmisc.cor$P)))
p.adj.lower = p.adjust(hmisc.cor$P[lower.tri(hmisc.cor$P)], method = method)
p.adj.upper = p.adjust(hmisc.cor$P[upper.tri(hmisc.cor$P)], method = method)
p.adj.mx <- matrix(rep(0,ncol(hmisc.cor$P)*ncol(hmisc.cor$P)), nrow = ncol(hmisc.cor$P))
p.adj.mx[lower.tri(p.adj.mx)] <- p.adj.lower
p.adj.mx[upper.tri(p.adj.mx)] <- p.adj.upper
diag(p.adj.mx) = 1
colnames(p.adj.mx) = rownames(p.adj.mx) = colnames(hmisc.cor$P)
stopifnot(isTRUE(isSymmetric(p.adj.mx)))
return(p.adj.mx)
}
# simple function for scaling because scale return format is annoying
scale.simple = function(x) {
(x - base::mean(x)) / stats::sd(x)
}
ent =
tryCatch(
AnnotationDbi::select(org.Hs.eg.db::org.Hs.eg.db,
keys = res$CC10$gene, columns = c("ENTREZID", "SYMBOL"),
keytype = "SYMBOL"),
error = function(e) return(NA)
)
dup = duplicated(ent$SYMBOL)
ent = ent[!dup, ]$ENTREZID
ent = ent[!is.na(ent)]
# R 4.2 Seurat v 3 or 4
suppressMessages(library(Seurat))
# your path datapath = my_path
# dir.create(datapath)
# read main object Seurat 2.4
# s = readRDS(file = 'my_path/'))
# save cell IDs
cells = rownames(s@meta.data)
# extract data structure from Seurat V2 object
md = s@meta.data[cells, ]
rna.raw = s@raw.data[ ,cells]
rna.norm = s@data[ ,cells]
adt.raw = s@assay$CITE@raw.data[ ,cells]
adt.norm = s@assay$CITE@data[ ,cells]
# make Seurat v4 object
ss = CreateSeuratObject(counts = rna.raw, meta.data = md)
ss@assays$RNA@data = rna.norm
adt.assay = CreateAssayObject(counts = adt.raw)
ss[['CITE']] <- adt.assay
ss@assays$CITE@data <- adt.norm
saveRDS(ss, file = paste0(datapath, 'new.rds'))
After group by summarize, just separate the new vars being added with commas within the summarize call.
# e.g. df is a long dataframe with celltype, timepoint, sampleid, gene, norm_count
summary_df = df %>%
group_by(celltype, timepoint, sampleid, gene) %>%
summarize(
gene_sd = sd(norm_count),
gene_mad = mad(norm_count),
gene_mean = mean(norm_count),
)
use matches
x = x %>%
dplyr::select(matches('somestring'))
grepl inside the filter argument
x = x %>%
dplyr::filter(grepl("string1|string2", x = group))
assuming log transformed data so subtracting to calculate a fold change.
Calculate the log fold change of the variable "average" over 2 timepoints.
The filter removes the unused rows because they are now summarized by the fold change.
new_df =
old_df %>%
arrange(category, sampleid) %>% # optional
mutate(fold_change = lead(average) - average) %>%
filter(timepoint == "second_timepoint")
see here need to Embrace the argument in double brackets {{}}.
collapsedown = function(dat, grouping_var){
gvar = rlang::sym(grouping_var)
dat %>%
dplyr::group_by({{gvar}}) %>%
dplyr::summarise_each(list(~unique(.)))
}
yy = collapsedown(dat = cell_metadata[ ,vars_all], grouping_var = 'sample')
A diverging palette should always be used to display standardized vars or data above and below 0. The mid point of the heatmap should ALWAYS be the mid point in the diverging palette or else you can confuse the reader.
# got this simple solution from the post below
# https://stackoverflow.com/questions/31677923/set-0-point-for-pheatmap-in-r
# any diverging palette
cu = RColorBrewer::brewer.pal(name = 'PRGn',n = 12)
range <- max(abs(mtx));
pheatmap(mtx, color = cu, breaks = seq(-range, range, length.out = 12))
# to find a diverging palette:
RColorBrewer::display.brewer.all()
grDevices::adjustcolor( "red", alpha.f = 0.2)
guides(color = guide_legend(override.aes = list(size=2)))
theme(legend.key.size = unit(0.3, 'cm'), #change legend key size
legend.key.height = unit(0.3, 'cm'), #change legend key height
legend.key.width = unit(0.3, 'cm'), #change legend key width
legend.title = element_text(size=8), #change legend title font size
legend.text = element_text(size=8)) #change legend text font size
boxbox = list(
theme_bw(),
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank())
)
+ theme(axis.text.x=element_text(angle = -90, hjust = 0))
For doing annoying things with strings and regex
e.g. btm names...
new_names = vapply(strsplit(oldnames," "), `[`, 1, FUN.VALUE=character(1))
gene.rm = str_detect(string = gene.vector, pattern = "RP[0-9]-" )
vector = c('BUB1B, CCL22, CD58, CD59, DBI')
cat(gsub("\\b", '"', vector, perl=T))
generic
tryCatch(function(object = x, args = args), error = function(e) return(NA))
Example with additional options
tryCatch(
expr = {
# function here
m1.pls = train(
group ~ .,
data = dtrain,
method = 'pls',
tuneGrid = pls.grid,
trControl = ctrl,
metric = "ROC"
)
# end function
},
error = function(e){
message('error')
return(NA)
},
finally = {
message('done')
}
)
mylist = mylist[order(match(names(mylist), vector_with_desired_order))]
cd _path_to_repo_
git init
git pull https://github.com/MattPM/repo
git add .
git commit -m "add funcion"
# if error related to user auth etc. try this
git config --global user.email my_email@__.com
git commit --amend --reset-author
git remote add origin https://github.com/MattPM/repo
git pull origin master
git push origin master
git status
# On branch master
# nothing to commit, working tree clean
# cd to the root dir of the project
git reset HEAD ~git reset HEAD~
If you already comitted the file it is a bit more challenging, but this tool has worked.
# download this tool: https://rtyley.github.io/bfg-repo-cleaner/#usage
java -jar Downloads/bfg-1.14.0.jar --delete-files id_{NAME_OF_HUGE_HTML_FILE.html} PATH/TO/LOCAL_GIT_REPO/myrepo
path/I/Want/To/Ignore*
to ignore a single file in each sub directory with the same name (ignore all generated data and figures)
generated_data/ data/ figures/
# https://stackoverflow.com/questions/7694887/is-there-a-command-line-utility-for-rendering-github-flavored-markdown
# install grip
# cd to dir where markdown is
grip README.md
# go to localhost/whatever and print -> save as pdf
# without the files just directories
ls -R | grep ":$" | sed -e 's/:$//' -e 's/[^-][^\/]*\//--/g' -e 's/^/ /' -e 's/-/|/'
# with the directories
find . | sed -e "s/[^-][^\/]*\// |/g" -e "s/|\([^ ]\)/|-\1/"
virtualenv_create("r-reticulate")
virtualenv_install("r-reticulate", "umap-learn")
use_virtualenv("r-reticulate")
library(umap)
sudo sysctl debug.lowpri_throttle_enabled=1
# last command is time in seconds - this for 27hours, e.g. 86400 is stay awa
caffeinate -d -i -m -s -t 1000000