The SCP package provides a comprehensive set of tools for single cell data processing and downstream analysis.
The package includes facilities for:
- Integrated single cell quality control methods.
- Pipelines embedded with multiple methods for normalization, feature reduction, and cell population identification (standard Seurat workflow).
- Pipelines embedded with multiple data integration methods, including Uncorrected, Seurat, scVI, MNN, fastMNN, Harmony, Scanorama, BBKNN, CSS, LIGER, Conos.
- Multiple single cell downstream analyses such as identification of differential features, enrichment analysis, GSEA analysis, identification of dynamic features, PAGA, RNA velocity, Palantir, Monocle2, Monocle3, etc.
- Multiple methods for automatic annotation of single-cell data and methods for projection between single-cell datasets.
- High quality data visualization methods.
- Fast deployment of single-cell data into SCExplorer, a shiny app that provides an interactive visualization interface.
The functions in the SCP package are all developed around the Seurat object and compatible with other Seurat functions.
- R >= 4.1.0
You can install the latest version of SCP from GitHub with:
if (!require("devtools", quietly = TRUE)) {
install.packages("devtools")
}
devtools::install_github("zhanghao-njmu/SCP")
To run functions such as RunSCVELO
or RunPAGA
, SCP requires
conda to create a
separate python environment.
You can use PrepareEnv()
to create the SCP python environment. If
conda binary not found, it will automatically download and install a
miniconda.
SCP::PrepareEnv()
Or run PrepareEnv(conda_binary = "/path/to/conda")
to use a particular
conda binary.
SCP::PrepareEnv(conda_binary = "/path/to/conda")
If the download of miniconda or packages is slow, you can specify the miniconda repo and PyPI mirror according to your network region.
PrepareEnv(
miniconda_repo = "https://mirrors.bfsu.edu.cn/anaconda/miniconda",
pip_options = "-i https://pypi.tuna.tsinghua.edu.cn/simple"
)
If there are some R package version conflicts, or you do not want to change your current R environment, you can use the renv package to install SCP into an isolated R environment.
env_dir <- "~/SCP_env/" # It cannot be the home directory "~" !
dir.create(env_dir, recursive = TRUE)
setwd(env_dir)
install.packages("renv")
renv::init(project = env_dir, bare = TRUE, force = TRUE, restart = TRUE)
renv::activate(project = env_dir)
install.packages("devtools")
devtools::install_github("zhanghao-njmu/SCP", upgrade = "always")
SCP::PrepareEnv()
When installing packages, network issues may cause the download to fail. Sometimes you need to provide a GitHub personal access token and restart the r session before downloading.
env_dir <- "~/SCP_env/"
renv::activate(project = env_dir)
library(SCP)
data("pancreas_sub")
pancreas_sub <- RunPAGA(srt = pancreas_sub, group_by = "SubCellType", linear_reduction = "PCA", nonlinear_reduction = "UMAP")
ClassDimPlot(pancreas_sub, group.by = "SubCellType", reduction = "draw_graph_fr")
renv::snapshot(project = env_dir)
renv::restore(project = env_dir)
The analysis is based on a subsetted version of mouse pancreas data.
library(SCP)
data("pancreas_sub")
ClassDimPlot(
srt = pancreas_sub, group.by = c("CellType", "SubCellType"),
reduction = "UMAP", theme_use = "theme_blank"
)
ClassDimPlot(
srt = pancreas_sub, group.by = "SubCellType", stat.by = "Phase",
reduction = "UMAP", theme_use = "theme_blank"
)
ExpDimPlot(
srt = pancreas_sub, features = c("Sox9", "Neurog3", "Fev", "Rbp4"),
reduction = "UMAP", theme_use = "theme_blank"
)
ExpDimPlot(
srt = pancreas_sub, features = c("Ins1", "Gcg", "Sst", "Ghrl"),
compare_features = TRUE, label = TRUE, label_insitu = TRUE,
reduction = "UMAP", theme_use = "theme_blank"
)
ht <- GroupHeatmap(
srt = pancreas_sub,
features = c(
"Sox9", "Anxa2", # Ductal
"Neurog3", "Hes6", # EPs
"Fev", "Neurod1", # Pre-endocrine
"Rbp4", "Pyy", # Endocrine
"Ins1", "Gcg", "Sst", "Ghrl" # Beta, Alpha, Delta, Epsilon
),
group.by = c("CellType", "SubCellType"),
cell_annotation = c("Phase", "G2M_score", "Neurod2"),
cell_palette = c("Dark2", "Paired", "Paired"),
show_row_names = TRUE,
add_dot = TRUE, add_reticle = TRUE
)
print(ht$plot)
pancreas_sub <- RunCellQC(srt = pancreas_sub)
ClassDimPlot(srt = pancreas_sub, group.by = "CellQC", reduction = "UMAP")
ClassStatPlot(srt = pancreas_sub, stat.by = "CellQC", group.by = "CellType", label = TRUE)
ClassStatPlot(
srt = pancreas_sub,
stat.by = c(
"db_qc", "outlier_qc", "umi_qc", "gene_qc",
"mito_qc", "ribo_qc", "ribo_mito_ratio_qc", "species_qc"
),
plot_type = "upset", stat_level = "Fail"
)
pancreas_sub <- Standard_SCP(srt = pancreas_sub)
ClassDimPlot(
srt = pancreas_sub, group.by = c("CellType", "SubCellType"),
reduction = "StandardUMAP2D", theme_use = "theme_blank"
)
ClassDimPlot3D(srt = pancreas_sub, group.by = "SubCellType")
ExpDimPlot3D(srt = pancreas_sub, features = c("Sox9", "Neurog3", "Fev", "Rbp4"))
Example data for integration is a subsetted version of panc8(eight human pancreas datasets)
data("panc8_sub")
panc8_sub <- Integration_SCP(srtMerge = panc8_sub, batch = "tech", integration_method = "Seurat")
panc8_sub <- Integration_SCP(srtMerge = panc8_sub, batch = "tech", integration_method = "Harmony")
ClassDimPlot(
srt = panc8_sub, group.by = c("celltype", "tech"), reduction = "SeuratUMAP2D",
title = "Seurat", theme_use = "theme_blank"
)
ClassDimPlot(
srt = panc8_sub, group.by = c("celltype", "tech"), reduction = "HarmonyUMAP2D",
title = "Harmony", theme_use = "theme_blank"
)
panc8_rename <- RenameFeatures(srt = panc8_sub, newnames = make.unique(stringr::str_to_title(rownames(panc8_sub))), assays = "RNA")
pancreas_sub <- RunKNNMap(srt_query = pancreas_sub, srt_ref = panc8_rename, ref_umap = "SeuratUMAP2D")
ProjectionPlot(
srt_query = pancreas_sub, srt_ref = panc8_rename,
query_group = "SubCellType", ref_group = "celltype"
)
data("ref_scMCA")
pancreas_sub <- RunKNNPredict(srt_query = pancreas_sub, bulk_ref = ref_scMCA, filter_lowfreq = 20)
ClassDimPlot(srt = pancreas_sub, group.by = "knnpredict_classification", reduction = "UMAP", label = TRUE)
pancreas_sub <- RunKNNPredict(
srt_query = pancreas_sub, srt_ref = panc8_rename,
ref_group = "celltype", filter_lowfreq = 20
)
ClassDimPlot(srt = pancreas_sub, group.by = "knnpredict_classification", reduction = "UMAP", label = TRUE)
pancreas_sub <- RunPAGA(
srt = pancreas_sub, group_by = "SubCellType",
linear_reduction = "PCA", nonlinear_reduction = "UMAP", return_seurat = TRUE
)
PAGAPlot(srt = pancreas_sub, reduction = "UMAP", label = TRUE, label_insitu = TRUE, label_repel = TRUE)
pancreas_sub <- RunSCVELO(
srt = pancreas_sub, group_by = "SubCellType",
linear_reduction = "PCA", nonlinear_reduction = "UMAP", return_seurat = TRUE
)
VelocityPlot(srt = pancreas_sub, reduction = "UMAP", group_by = "SubCellType")
VelocityPlot(srt = pancreas_sub, reduction = "UMAP", plot_type = "stream")
pancreas_sub <- RunDEtest(srt = pancreas_sub, group_by = "CellType", fc.threshold = 1, only.pos = FALSE)
VolcanoPlot(srt = pancreas_sub, group_by = "CellType")
DEGs <- pancreas_sub@tools$DEtest_CellType$AllMarkers_wilcox
DEGs <- DEGs[with(DEGs, avg_log2FC > 1 & p_val_adj < 0.05), ]
pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "SP"))
ht <- ExpHeatmap(
srt = pancreas_sub, group.by = "CellType", features = DEGs$gene, feature_split = DEGs$group1,
species = "Mus_musculus", db = "GO_BP", anno_terms = TRUE, anno_keys = TRUE, anno_features = TRUE,
feature_annotation = c("TF", "SP"), feature_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
height = 6, width = 5
)
print(ht$plot)
pancreas_sub <- RunEnrichment(
srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
DE_threshold = "avg_log2FC > 1 & p_val_adj < 0.05"
)
EnrichmentPlot(
srt = pancreas_sub, group_by = "CellType", group_use = c("Ductal", "Endocrine"),
plot_type = "bar"
)
EnrichmentPlot(
srt = pancreas_sub, group_by = "CellType", group_use = c("Ductal", "Endocrine"),
plot_type = "wordcloud"
)
EnrichmentPlot(
srt = pancreas_sub, group_by = "CellType", group_use = c("Ductal", "Endocrine"),
plot_type = "wordcloud", word_type = "feature"
)
pancreas_sub <- RunGSEA(
srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
DE_threshold = "p_val_adj < 0.05"
)
GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine")
GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", geneSetID = "GO:0007186")
pancreas_sub <- RunSlingshot(srt = pancreas_sub, group.by = "SubCellType", reduction = "UMAP")
ExpDimPlot(pancreas_sub, features = paste0("Lineage", 1:3), reduction = "UMAP", theme_use = "theme_blank")
ClassDimPlot(pancreas_sub, group.by = "SubCellType", reduction = "UMAP", lineages = paste0("Lineage", 1:3), lineages_span = 0.1)
pancreas_sub <- RunDynamicFeatures(srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"), n_candidates = 200)
ht <- DynamicHeatmap(
srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"),
use_fitted = TRUE, n_split = 6, reverse_ht = "Lineage1",
species = "Mus_musculus", db = "GO_BP", anno_terms = TRUE, anno_keys = TRUE, anno_features = TRUE,
heatmap_palette = "viridis", cell_annotation = "SubCellType",
separate_annotation = list("SubCellType", c("Nnat", "Irx1")), separate_palette = c("Paired", "Set1"),
feature_annotation = c("TF", "SP"), feature_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
pseudotime_label = 25, pseudotime_label_color = "red",
height = 6, width = 5
)
print(ht$plot)
DynamicPlot(
srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"), group.by = "SubCellType",
features = c("Plk1", "Hes1", "Neurod2", "Ghrl", "Gcg", "Ins2"),
compare_lineages = TRUE, compare_features = FALSE
)
ExpStatPlot(
srt = pancreas_sub, group.by = "SubCellType", bg.by = "CellType",
features = c("Sox9", "Neurod2", "Isl1", "Rbp4"),
comparisons = list(
c("Ductal", "Ngn3 low EP"),
c("Ngn3 high EP", "Pre-endocrine"),
c("Alpha", "Beta")
),
multiplegroup_comparisons = TRUE
)
PrepareSCExplorer(list(mouse_pancreas = pancreas_sub, human_pancreas = panc8_sub), base_dir = "./SCExplorer")
app <- RunSCExplorer(base_dir = "./SCExplorer")
list.files("./SCExplorer") # This directory can be used as site directory for Shiny Server.
if (interactive()) {
shiny::runApp(app)
}
More examples of SCP can be found in the documentation of the functions, such as Integration_SCP, RunKNNMap, RunMonocle3, ClassDimPlot, GroupHeatmap, RunSCExplorer, etc.