Increment version number to 0.1.7

liangdp1984 · Sep 8, 2022 · d28c5ae · d28c5ae
1 parent 39ba5ea
commit d28c5ae
Show file tree

Hide file tree

Showing 406 changed files with 53,952 additions and 315 deletions.
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
diff --git a/.gitignore b/.gitignore
@@ -2,5 +2,4 @@
 .Rhistory
 .RData
 .Ruserdata
-docs
 SCExplorer
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SCP
 Type: Package
 Title: Single Cell Pipeline
-Version: 0.1.5
+Version: 0.1.7
 Author: Hao Zhang
 Maintainer: Hao Zhang <[email protected]>
 Description: The SCP package provides a comprehensive set of tools for single cell data processing and downstream analysis.
@@ -11,6 +11,7 @@ LazyData: false
 LazyDataCompression: xz
 Depends:
     R (>= 3.5.0)
+biocViews: SingleCell, Visualization    
 Imports:
     AnnotationDbi,
     BiocParallel,
@@ -25,7 +26,6 @@ Imports:
     ggpubr,
     ggrepel,
     ggupset,
-    ggVennDiagram,
     ggwordcloud,
     GO.db,
     GOSemSim,
@@ -72,11 +72,13 @@ Remotes:
     zhanghao-njmu/seurat-wrappers,
     zhanghao-njmu/seurat-data,
     quadbiolab/simspec,
-    carmonalab/UCell
+    carmonalab/UCell,
+    jokergoo/ComplexHeatmap
 Suggests:
     AUCell,
     covr,
     devtools,
+    ggVennDiagram,
     hexbin,
     htmlwidgets,
     httr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -53,6 +53,7 @@ export(PAGAPlot)
 export(PrepareEnrichmentDB)
 export(PrepareSCExplorer)
 export(ProjectionPlot)
+export(RecoverCounts)
 export(RenameFeatures)
 export(RunCSSMap)
 export(RunCellQC)
@@ -110,6 +111,7 @@ export(db_scds)
 export(exist_pkg)
 export(fastMNN_integrate)
 export(isOutlier)
+export(list_palette)
 export(palette_scp)
 export(panel_fix)
 export(panel_fix_single)
@@ -339,6 +341,7 @@ importFrom(grid,linesGrob)
 importFrom(grid,rasterGrob)
 importFrom(grid,textGrob)
 importFrom(grid,unit)
+importFrom(grid,unitType)
 importFrom(grid,viewport)
 importFrom(gtable,gtable_add_cols)
 importFrom(gtable,gtable_add_grob)
@@ -360,8 +363,6 @@ importFrom(reshape2,dcast)
 importFrom(reshape2,melt)
 importFrom(reticulate,import)
 importFrom(reticulate,iterate)
-importFrom(reticulate,py_module_available)
-importFrom(reticulate,py_set_seed)
 importFrom(reticulate,py_to_r)
 importFrom(rhdf5,h5createDataset)
 importFrom(rhdf5,h5createFile)

diff --git a/R/SCP-analysis.R b/R/SCP-analysis.R
@@ -799,26 +799,29 @@ PrepareEnrichmentDB <- function(species = c("Homo_sapiens", "Mus_musculus"),
   ))) {
     stop("'enrichment' is invalid. Can be 'GO_BP', 'GO_CC', 'GO_MF', 'KEGG', 'WikiPathway', 'Reactome','ProteinComplex', 'PFAM', 'Chromosome'")
   }
+
   db_list <- list()
   for (sps in species) {
     message("Species: ", sps)
     if (!isTRUE(db_update)) {
       for (term in enrichment) {
         # Try to load cached database, if already generated.
         dbinfo <- ListEnrichmentDB(species = sps, enrichment = term)
-        if (db_version == "latest") {
-          pathname <- dbinfo[order(dbinfo[["timestamp"]], decreasing = TRUE)[1], "file"]
-        } else {
-          pathname <- dbinfo[grep(db_version, dbinfo[["db_version"]], fixed = TRUE)[1], "file"]
-          if (is.na(pathname)) {
-            warning("There is no ", db_version, " version of the database. Use the latest version.", immediate. = TRUE)
+        if (nrow(dbinfo) > 0 && !is.null(dbinfo)) {
+          if (db_version == "latest") {
             pathname <- dbinfo[order(dbinfo[["timestamp"]], decreasing = TRUE)[1], "file"]
+          } else {
+            pathname <- dbinfo[grep(db_version, dbinfo[["db_version"]], fixed = TRUE)[1], "file"]
+            if (is.na(pathname)) {
+              warning("There is no ", db_version, " version of the database. Use the latest version.", immediate. = TRUE)
+              pathname <- dbinfo[order(dbinfo[["timestamp"]], decreasing = TRUE)[1], "file"]
+            }
+          }
+          if (!is.na(pathname)) {
+            header <- readCacheHeader(pathname)
+            message("Loaded cached db: ", term, " version:", strsplit(header[["comment"]], "\\|")[[1]][1], " created:", header[["timestamp"]])
+            db_list[[sps]][[term]] <- loadCache(pathname = pathname)
           }
-        }
-        if (!is.na(pathname)) {
-          header <- readCacheHeader(pathname)
-          message("Loaded cached db: ", term, " version:", strsplit(header[["comment"]], "\\|")[[1]][1], " created:", header[["timestamp"]])
-          db_list[[sps]][[term]] <- loadCache(pathname = pathname)
         }
       }
     }
@@ -892,6 +895,8 @@ PrepareEnrichmentDB <- function(species = c("Homo_sapiens", "Mus_musculus"),
 
       ## KEGG ---------------------------------------------------------------------------
       if (any(enrichment == "KEGG") && (!"KEGG" %in% names(db_list[[sps]]))) {
+        options(timeout = 120)
+        check_R("httr")
         message("Preparing database: KEGG")
         kegg_db <- "pathway"
         kegg_pathwaygene_url <- paste0("http://rest.kegg.jp/link/", kegg_sp, "/", kegg_db, collapse = "")
@@ -904,7 +909,8 @@ PrepareEnrichmentDB <- function(species = c("Homo_sapiens", "Mus_musculus"),
         TERM2NAME <- TERM2NAME[TERM2NAME[, 1] %in% TERM2GENE[, 1], ]
         colnames(TERM2GENE) <- c("Term", "entrez_id")
         colnames(TERM2NAME) <- c("Term", "Name")
-        kegg_info <- readLines("http://rest.kegg.jp/info/hsa")
+        # kegg_info <- readLines("http://rest.kegg.jp/info/hsa")
+        kegg_info <- strsplit(httr::content(httr::GET("http://rest.kegg.jp/info/hsa")), split = "\n")[[1]]
         version <- gsub(".*(?=Release)", replacement = "", x = kegg_info[grepl("Release", x = kegg_info)], perl = TRUE)
         db_list[[sps]][["KEGG"]][["TERM2GENE"]] <- unique(TERM2GENE)
         db_list[[sps]][["KEGG"]][["TERM2NAME"]] <- unique(TERM2NAME)
@@ -918,20 +924,22 @@ PrepareEnrichmentDB <- function(species = c("Homo_sapiens", "Mus_musculus"),
       ## WikiPathway ---------------------------------------------------------------------------
       if (any(enrichment == "WikiPathway") && (!"WikiPathway" %in% names(db_list[[sps]]))) {
         message("Preparing database: WikiPathway")
+        options(timeout = 120)
         tempdir <- tempdir()
         gmt_files <- list.files(tempdir)[grep(".gmt", x = list.files(tempdir))]
         if (length(gmt_files) > 0) {
           file.remove(paste0(tempdir, "/", gmt_files))
         }
         temp <- tempfile()
-        download.file("https://wikipathways-data.wmcloud.org/current/gmt", temp)
+        use_wget <- suppressWarnings(system2("wget", stdout = FALSE, stderr = FALSE))
+        download.file("https://wikipathways-data.wmcloud.org/current/gmt", destfile = temp, method = ifelse(use_wget == 1, "wget", "auto"))
 
         lines <- readLines(temp)
         lines <- lines[grep("File</a></td>", lines, fixed = TRUE)]
         lines <- gsub("(.*<td><a href='./)|('> File</a></td>)", "", lines)
         gmtfile <- lines[grep(sps, lines, fixed = TRUE)]
         version <- strsplit(gmtfile, split = "-")[[1]][[2]]
-        download.file(paste0("https://wikipathways-data.wmcloud.org/current/gmt/", gmtfile), temp)
+        download.file(paste0("https://wikipathways-data.wmcloud.org/current/gmt/", gmtfile), destfile = temp, method = ifelse(use_wget == 1, "wget", "auto"))
         wiki_gmt <- clusterProfiler::read.gmt(temp)
         unlink(temp)
         wiki_gmt <- apply(wiki_gmt, 1, function(x) {
@@ -981,14 +989,16 @@ PrepareEnrichmentDB <- function(species = c("Homo_sapiens", "Mus_musculus"),
       ## Protein complex ---------------------------------------------------------------------------
       if (any(enrichment == "ProteinComplex") && (!"ProteinComplex" %in% names(db_list[[sps]]))) {
         message("Preparing database: ProteinComplex")
+        options(timeout = 120)
         check_R("taxize")
         uid <- suppressMessages(taxize::get_uid(complex_sp, messages = FALSE))
         common_name <- unlist(strsplit(unlist(suppressMessages(taxize::sci2comm(uid, db = "ncbi"))), " "))
         common_name <- common_name[length(common_name)]
         common_name <- paste(toupper(substr(common_name, 1, 1)), substr(common_name, 2, nchar(common_name)), sep = "")
 
         temp <- tempfile()
-        download.file("http://mips.helmholtz-muenchen.de/corum/download/coreComplexes.txt.zip", temp)
+        use_wget <- suppressWarnings(system2("wget", stdout = FALSE, stderr = FALSE))
+        download.file("http://mips.helmholtz-muenchen.de/corum/download/coreComplexes.txt.zip", temp, method = ifelse(use_wget == 1, "wget", "auto"))
         df <- read.table(unz(temp, "coreComplexes.txt"), header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, quote = "")
         df <- df[which(df$Organism == common_name), ]
         s <- strsplit(df$subunits.Entrez.IDs., split = ";")
@@ -1000,7 +1010,7 @@ PrepareEnrichmentDB <- function(species = c("Homo_sapiens", "Mus_musculus"),
         TERM2NAME <- complex[, c(3, 1)]
         colnames(TERM2GENE) <- c("Term", "entrez_id")
         colnames(TERM2NAME) <- c("Term", "Name")
-        download.file("https://mips.helmholtz-muenchen.de/corum/download.html", temp)
+        download.file("https://mips.helmholtz-muenchen.de/corum/download.html", temp, method = ifelse(use_wget == 1, "wget", "auto"))
         lines <- readLines(temp)
         lines <- lines[grep("current release", lines)]
         version <- gsub("(.*\"setLightFont\">)|(current release.*)", "", lines)
@@ -1274,6 +1284,7 @@ RunEnrichment <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_t
   input <- input[!is.na(input[[db_IDtype]]), ]
 
   message("Permform enrichment...")
+  suppressPackageStartupMessages(library("DOSE"))
   comb <- expand.grid(group = levels(geneID_groups), term = enrichment, stringsAsFactors = FALSE)
   res_list <- bplapply(seq_len(nrow(comb)),
     FUN = function(i, id) {
@@ -1526,6 +1537,7 @@ RunGSEA <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_thresho
   input <- input[!is.na(input[[db_IDtype]]), ]
 
   message("Permform GSEA...")
+  suppressPackageStartupMessages(library("DOSE"))
   comb <- expand.grid(group = levels(geneID_groups), term = enrichment, stringsAsFactors = FALSE)
   res_list <- BiocParallel::bplapply(seq_len(nrow(comb)),
     FUN = function(i, id) {
@@ -2412,6 +2424,13 @@ srt_to_adata <- function(srt,
 #' adata <- RunPAGA(adata = adata, group_by = "SubCellType", liner_reduction = "PCA", nonliner_reduction = "UMAP")
 #' srt <- adata_to_srt(adata)
 #' srt
+#'
+#' ### Or convert a h5ad file to Seurat object
+#' # library(reticulate)
+#' # sc <- import("scanpy")
+#' # adata <- sc$read_h5ad("pancreas.h5ad")
+#' # srt <- adata_to_srt(adata)
+#' # srt
 #' @importFrom Seurat CreateSeuratObject CreateAssayObject CreateDimReducObject AddMetaData
 #' @importFrom SeuratObject as.Graph
 #' @importFrom reticulate iterate
@@ -2424,19 +2443,25 @@ adata_to_srt <- function(adata) {
   x <- t(adata$X)
   rownames(x) <- adata$var_names$values
   colnames(x) <- adata$obs_names$values
+  if (!"dgCMatrix" %in% class(x)) {
+    x <- as(x[1:nrow(x), ], "dgCMatrix")
+  }
 
   metadata <- NULL
   if (length(adata$obs_keys()) > 0) {
     metadata <- as.data.frame(adata$obs)
   }
 
-  srt <- CreateSeuratObject(counts = x, meta.data = metadata)
+  srt <- CreateSeuratObject(counts = as(x[1:nrow(x), ], "dgCMatrix"), meta.data = metadata)
 
   if (length(adata$layers$keys()) > 0) {
     for (k in iterate(adata$layers$keys())) {
       layer <- t(adata$layers[[k]])
       rownames(layer) <- adata$var_names$values
       colnames(layer) <- adata$obs_names$values
+      if (!"dgCMatrix" %in% class(layer)) {
+        layer <- as(layer[1:nrow(layer), ], "dgCMatrix")
+      }
       srt[[k]] <- CreateAssayObject(counts = layer)
     }
   }

diff --git a/R/SCP-app.R b/R/SCP-app.R
@@ -469,7 +469,6 @@ RunSCExplorer <- function(base_dir = "SCExplorer",
                           style_script = require("styler", quietly = TRUE),
                           overwrite = FALSE,
                           return_app = TRUE) {
-  base_dir <- normalizePath(base_dir)
   DataFile_full <- paste0(base_dir, "/", DataFile)
   MetaFile_full <- paste0(base_dir, "/", MetaFile)
   if (!file.exists(DataFile_full) || !file.exists(MetaFile_full)) {

diff --git a/R/SCP-cellqc.R b/R/SCP-cellqc.R
@@ -292,7 +292,7 @@ RunCellQC <- function(srt, assay = "RNA",
                       UMI_threshold = 3000, gene_threshold = 1000,
                       mito_threshold = 20, mito_pattern = c("MT-", "Mt-", "mt-"), mito_gene = NULL,
                       ribo_threshold = 50, ribo_pattern = c("RP[SL]\\d+\\w{0,1}\\d*$", "Rp[sl]\\d+\\w{0,1}\\d*$", "rp[sl]\\d+\\w{0,1}\\d*$"), ribo_gene = NULL,
-                      ribo_mito_ratio_threshold = 0,
+                      ribo_mito_ratio_range = c(1, 10),
                       species = NULL, species_gene_prefix = NULL, species_percent = 95,
                       seed = 11) {
   set.seed(seed)
@@ -344,7 +344,7 @@ RunCellQC <- function(srt, assay = "RNA",
     nFeature <- srt[[paste0(c(paste0("nFeature_", assay), sp), collapse = ".")]] <- colSums(srt[[assay]]@counts[sp_genes, ] > 0)
     percent.mito <- srt[[paste0(c("percent.mito", sp), collapse = ".")]] <- PercentageFeatureSet(object = srt, assay = assay, pattern = paste0("(", paste0("^", prefix, "-*", mito_pattern), ")", collapse = "|"), features = mito_gene)[[1]]
     percent.ribo <- srt[[paste0(c("percent.ribo", sp), collapse = ".")]] <- PercentageFeatureSet(object = srt, assay = assay, pattern = paste0("(", paste0("^", prefix, "-*", ribo_pattern), ")", collapse = "|"), features = ribo_gene)[[1]]
-    percent.ribo <- srt[[paste0(c("percent.ribo.mito.ratio", sp), collapse = ".")]] <- srt[[paste0(c("percent.ribo", sp), collapse = "."), drop = TRUE]] / srt[[paste0(c("percent.mito", sp), collapse = "."), drop = TRUE]]
+    percent.ribo <- srt[[paste0(c("ribo.mito.ratio", sp), collapse = ".")]] <- srt[[paste0(c("percent.ribo", sp), collapse = "."), drop = TRUE]] / srt[[paste0(c("percent.mito", sp), collapse = "."), drop = TRUE]]
     percent.genome <- srt[[paste0(c("percent.genome", sp), collapse = ".")]] <- PercentageFeatureSet(object = srt, assay = assay, pattern = paste0("^", prefix))[[1]]
 
     if (n == 1) {
@@ -412,7 +412,7 @@ RunCellQC <- function(srt, assay = "RNA",
     ribo_qc <- colnames(srt)[srt[[paste0(c("percent.ribo", species[1]), collapse = "."), drop = TRUE]] > ribo_threshold]
   }
   if ("ribo_mito_ratio" %in% qc_metrics) {
-    ribo_mito_ratio_qc <- colnames(srt)[srt[[paste0(c("percent.ribo.mito.ratio", species[1]), collapse = "."), drop = TRUE]] < ribo_mito_ratio_threshold]
+    ribo_mito_ratio_qc <- colnames(srt)[srt[[paste0(c("ribo.mito.ratio", species[1]), collapse = "."), drop = TRUE]] < ribo_mito_ratio_range[1] | srt[[paste0(c("ribo.mito.ratio", species[1]), collapse = "."), drop = TRUE]] > ribo_mito_ratio_range[2]]
   }
   if ("species" %in% qc_metrics) {
     species_qc <- colnames(srt)[srt[[paste0(c("percent.genome", species[1]), collapse = "."), drop = TRUE]] < species_percent]
@@ -427,7 +427,7 @@ RunCellQC <- function(srt, assay = "RNA",
   cat("...", length(gene_qc), "low-gene cells", "\n")
   cat("...", length(mito_qc), "high-mito cells", "\n")
   cat("...", length(ribo_qc), "high-ribo cells", "\n")
-  cat("...", length(ribo_mito_ratio_qc), "low-ribo_mito_ratio cells", "\n")
+  cat("...", length(ribo_mito_ratio_qc), " ribo_mito_ratio outlier cells", "\n")
   cat("...", length(species_qc), "species-contaminated cells", "\n")
   cat(">>>", "Remained cells after filtering:", ntotal - length(CellQC), "\n")