Merge pull request satijalab#617 from satijalab/fix/sct2_vignette

Updates for sct v2 vignette
mianmianyin · Jan 10, 2022 · 0b48323 · 0b48323
2 parents 5383cb5 + 7e0bb96
commit 0b48323
Show file tree

Hide file tree

Showing 9 changed files with 53 additions and 37 deletions.
diff --git a/R/differential_expression.R b/R/differential_expression.R
@@ -2004,9 +2004,9 @@ PerformDE <- function(
 #' pbmc_small2 <- SCTransform(object = pbmc_small, variable.features.n = 20)
 #' pbmc_merged <- merge(x = pbmc_small1, y = pbmc_small2)
 #' pbmc_merged <- PrepSCTFindMarkers(object = pbmc_merged)
-#' markers <- FindMarkers(object = pbmc_merged, ident.1="0", ident.2="1", assay="SCT")
-#' pbmc_subset <- subset(pbmc_merged, idents =Run c("0", "1"))
-#' markers_subset <- FindMarkers(object = pbmcx, ident.1="0", ident.2="1", assay="SCT", recorrect_umi = FALSE)
+#' markers <- FindMarkers(object = pbmc_merged, ident.1 = "0", ident.2 = "1", assay = "SCT")
+#' pbmc_subset <- subset(pbmc_merged, idents = c("0", "1"))
+#' markers_subset <- FindMarkers(object = pbmc_subset, ident.1 = "0", ident.2 = "1", assay = "SCT", recorrect_umi = FALSE)
 
 PrepSCTFindMarkers <- function(object, assay = "SCT", verbose = TRUE) {
   if (length(x = levels(x = object[[assay]])) == 1) {

diff --git a/R/integration.R b/R/integration.R
@@ -1912,7 +1912,7 @@ LocalStruct <- function(
 #' }
 #'
 #' @importFrom rlang invoke
-#' 
+#'
 #' @export
 #' @concept integration
 #'

diff --git a/R/objects.R b/R/objects.R
@@ -2253,7 +2253,10 @@ setAs(
           X = 1:length(x = vst.res),
           FUN = function(i) {
             vst.res[[i]]$umi.assay <- umi.assay[[i]]
-            return(PrepVSTResults(vst.res = vst.res[[i]], cell.names = colnames(x = from)))
+            return(PrepVSTResults(
+              vst.res = vst.res[[i]],
+              cell.names = colnames(x = from)
+            ))
           }
         )
         names(x = vst.res) <- paste0("model", 1:length(x = vst.res))
@@ -2570,8 +2573,13 @@ PrepVSTResults <- function(vst.res, cell.names) {
   if ("scale_factor" %in% names(vst.res$arguments)){
     median_umi <- vst.res$arguments$scale_factor
   }
-  if (is.na(median_umi)) median_umi <- median(cell.attrs$umi)
-
+  if (is.na(median_umi)) {
+    if ("umi" %in% colnames(x = cell.attrs)) {
+      median_umi <- median(cell.attrs$umi)
+    } else if ("log_umi" %in% colnames(x = cell.attrs)) {
+      median_umi <- median(10 ^ cell.attrs$umi)
+    }
+  }
   vst.res.SCTModel  <- SCTModel(
     feature.attributes = feature.attrs,
     cell.attributes = cell.attrs,

diff --git a/man/PrepSCTFindMarkers.Rd b/man/PrepSCTFindMarkers.Rd
diff --git a/tests/testthat/test_preprocessing.R b/tests/testthat/test_preprocessing.R
@@ -304,26 +304,26 @@ test_that("SCTransform wrapper works as expected", {
   expect_equal(fa["MS4A1", "detection_rate"], 0.15)
   expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6)
   expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6)
-  expect_equal(fa["MS4A1", "residual_mean"], 0.2362887026, tolerance = 1e-6)
-  expect_equal(fa["MS4A1", "residual_variance"], 2.875760656, tolerance = 1e-6)
+  expect_equal(fa["MS4A1", "residual_mean"], 0.2362887, tolerance = 1e-6)
+  expect_equal(fa["MS4A1", "residual_variance"], 2.875761, tolerance = 1e-6)
 })
 
 object <- suppressWarnings(SCTransform(object = object, ncells = 40, verbose = FALSE))
 test_that("SCTransform ncells param works", {
   expect_true("SCT" %in% names(object))
-  expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 11.47923, tolerance = 1e6)
+  expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[1]), 11.8332, tolerance = 1e6)
   expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "scale.data"))[5]), 0)
-  expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 55.42253476, tolerance = 1e-6)
-  expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.36674295, tolerance = 1e-6)
-  expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 119)
-  expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 26)
+  expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "data"))[1]), 54.59918, tolerance = 1e-6)
+  expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "data"))[5]), 11.74404, tolerance = 1e-6)
+  expect_equal(as.numeric(colSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[1]), 117)
+  expect_equal(as.numeric(rowSums(GetAssayData(object = object[["SCT"]], slot = "counts"))[5]), 28)
   expect_equal(length(VariableFeatures(object[["SCT"]])), 220)
   fa <- SCTResults(object = object, assay = "SCT", slot = "feature.attributes")
   expect_equal(fa["MS4A1", "detection_rate"], 0.15)
   expect_equal(fa["MS4A1", "gmean"], 0.2027364, tolerance = 1e-6)
   expect_equal(fa["MS4A1", "variance"], 1.025158, tolerance = 1e-6)
-  expect_equal(fa["MS4A1", "residual_mean"], 0.3084931639, tolerance = 1e-6)
-  expect_equal(fa["MS4A1", "residual_variance"], 3.721067314, tolerance = 1e-6)
+  expect_equal(fa["MS4A1", "residual_mean"], 0.3110813, tolerance = 1e-3)
+  expect_equal(fa["MS4A1", "residual_variance"], 3.871326, tolerance = 1e-3)
 })
 
 suppressWarnings(object[["SCT_SAVE"]] <- object[["SCT"]])

diff --git a/vignettes/assets/sctransform_v2.png b/vignettes/assets/sctransform_v2.png
diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd
@@ -125,7 +125,7 @@ Seurat also offers additional novel statistical methods for analyzing single-cel
 
 * Weighted-nearest neighbor (WNN) analysis: to define cell state based on multiple modalities [[paper](https://doi.org/10.1016/j.cell.2021.04.048)]
 * Mixscape: to analyze data from pooled single-cell CRISPR screens [[paper](https://doi.org/10.1038/s41588-021-00778-2)]
-* SCTransform: Improved normalization for single-cell RNA-seq data [paper](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1)]
+* SCTransform: Improved normalization for single-cell RNA-seq data [[paper](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1874-1)]]
 
 ```{r results='asis', echo=FALSE, warning=FALSE, message = FALSE}
 make_vignette_card_section(vdat = vdat, cat = 3)

diff --git a/vignettes/sctransform_v2_vignette.Rmd b/vignettes/sctransform_v2_vignette.Rmd
@@ -38,10 +38,10 @@ We recently introduced [sctransform](https://genomebiology.biomedcentral.com/art
 Users can install sctransform v2 from Github (CRAN update coming soon), and invoke the use of the updated method via the `vst.flavor` argument.
 
 ```{r tldr, eval=FALSE}
-# install sctransform
-devtools::install_github("satijalab/sctransform", ref="develop") 
+# install Seurat from Github (automatically updates sctransform)
+devtools::install_github("satijalab/seurat", ref="develop") 
 # invoke sctransform
-object <- SCTransform(object, vst.flavor = 'v2')
+object <- SCTransform(object, vst.flavor = "v2")
 ```
 
 ## Introduction
@@ -58,12 +58,12 @@ In this vignette, we use [sctransform v2](https://github.com/satijalab/sctransfo
 
 We will install sctransform v2 from Github. We will also install the [glmGamPoi](https://bioconductor.org/packages/release/bioc/html/glmGamPoi.html) package which substantially improves the speed of the learning procedure. 
 
-```{r}
+```{r results='hide', message=FALSE, warning=FALSE}
 # install glmGamPoi
 if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
 BiocManager::install("glmGamPoi")
-# install sctransform
-##devtools::install("satijalab/sctransform", ref="develop") 
+# install sctransform from Github
+devtools::install_github("satijalab/sctransform", ref="develop") 
 ```
 
 ## Setup the Seurat objects
@@ -132,7 +132,8 @@ ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = featu
 To integrate the two datasets, we use the `FindIntegrationAnchors()` function, which takes a list of Seurat objects as input, and use these anchors to integrate the two datasets together with `IntegrateData()`.
 
 ```{r ifnb.cca.sct.anchors}
-immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = "SCT", anchor.features = features)
+immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, 
+                                         normalization.method = "SCT", anchor.features = features)
 immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = "SCT")
 ```
 
@@ -155,7 +156,7 @@ DimPlot(immune.combined.sct, reduction = "umap", split.by = "stim")
 
 We can also visualize the distribution of annotated celltypes across control and stimulated datasets:
 
-```{r immunesca.cca.sct.split.dims, fig.width=12, fig.height=4}
+```{r immunesca.cca.sct.split.dims, fig.width=13, fig.height=4}
 p1 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "stim")
 p2 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "seurat_clusters", label = TRUE, repel = TRUE)
 p3 <- DimPlot(immune.combined.sct, reduction = "umap", group.by = "seurat_annotations", label = TRUE, repel = TRUE)
@@ -197,11 +198,13 @@ We can also use the corrected counts for visualization:
 ```{r feature.heatmaps, fig.height = 14}
 Idents(immune.combined.sct) <- "seurat_annotations"
 DefaultAssay(immune.combined.sct) <- "SCT"
-FeaturePlot(immune.combined.sct, features = c("CD3D", "GNLY", "IFI6"), split.by = "stim", max.cutoff = 3, cols = c("grey", "red"))
+FeaturePlot(immune.combined.sct, features = c("CD3D", "GNLY", "IFI6"), 
+            split.by = "stim", max.cutoff = 3, cols = c("grey", "red"))
 ```
 
 ```{r splitvln, fig.height = 12}
-plots <- VlnPlot(immune.combined.sct, features = c("LYZ", "ISG15", "CXCL10"), split.by = "stim", group.by = "seurat_annotations", pt.size = 0, combine = FALSE)
+plots <- VlnPlot(immune.combined.sct, features = c("LYZ", "ISG15", "CXCL10"), 
+                 split.by = "stim", group.by = "seurat_annotations", pt.size = 0, combine = FALSE)
 wrap_plots(plots = plots, ncol = 1)
 ```
 

diff --git a/vignettes/vignettes.yaml b/vignettes/vignettes.yaml
@@ -23,31 +23,31 @@
     - title: Introduction to scRNA-seq integration
       name: integration_introduction
       summary: |
-        An introduction to integrating scRNA-seq datasets in order to identify and compare shared cell types across experiments
+        An introduction to integrating scRNA-seq datasets in order to identify and compare shared cell types across experiments.
       image: pbmc_alignment.jpg
 
     - title: Mapping and annotating query datasets
       name: integration_mapping
       summary: |
-        Learn how to map a query scRNA-seq dataset onto a reference in order to automate the annotation and visualization of query cells
+        Learn how to map a query scRNA-seq dataset onto a reference in order to automate the annotation and visualization of query cells.
       image: assets/anchorsb_2018.png
 
     - title: Fast integration using reciprocal PCA (RPCA)
       name: integration_rpca
       summary: |
-        Identify anchors using the reciprocal PCA (rPCA) workflow, which performs a faster and more conservative integration
+        Identify anchors using the reciprocal PCA (rPCA) workflow, which performs a faster and more conservative integration.
       image: rpca_integration.jpg
 
     - title: Tips for integrating large datasets
       name: integration_large_datasets
       summary: |
-        Tips and examples for integrating very large scRNA-seq datasets (including >200,000 cells)
+        Tips and examples for integrating very large scRNA-seq datasets (including >200,000 cells).
       image: bm280k_integrated.jpg
 
     - title: Integrating scRNA-seq and scATAC-seq data
       name: atacseq_integration_vignette
       summary: |
-        Annotate, visualize, and interpret an scATAC-seq experiment using scRNA-seq data from the same biological system
+        Annotate, visualize, and interpret an scATAC-seq experiment using scRNA-seq data from the same biological system.
       image: atacseq_integration_vignette.jpg
 
     - title: Multimodal Reference Mapping
@@ -61,7 +61,7 @@
     - title: Weighted Nearest Neighbor Analysis
       name: weighted_nearest_neighbor_analysis
       summary: |
-        Analyze multimodal single-cell data with weighted nearest neighbor analysis in Seurat v4
+        Analyze multimodal single-cell data with weighted nearest neighbor analysis in Seurat v4.
       image: weighted_nearest_neighbor_analysis.jpg
 
     - title: Mixscape
@@ -76,6 +76,11 @@
         Examples of how to use the SCTransform wrapper in Seurat.
       image: assets/sctransform.png
 
+    - title: sctransform, v2 regularization
+      name: sctransform_v2_vignette
+      summary: |
+        Examples of how to perform normalization, feature selection, integration, and differential expression with an updated version of sctransform.
+      image: assets/sctransform_v2.png
 - category: Other
   vignettes:
     - title: Visualization
@@ -87,7 +92,7 @@
     - title: Cell Cycle Regression
       name: cell_cycle_vignette
       summary: |
-        Mitigate the effects of cell cycle heterogeneity by computing cell cycle phase scores based on marker genes
+        Mitigate the effects of cell cycle heterogeneity by computing cell cycle phase scores based on marker genes.
       image: cell_cycle_vignette.jpg
 
     - title: Differential Expression Testing
-Original file line number
+Diff line change
@@ Expand Up / @@ -1912,7 +1912,7 @@ LocalStruct <- function( @@
     #' }
     #'
     #' @importFrom rlang invoke
-    #'
+    #'
     #' @export
     #' @concept integration
     #'
@@ Expand Down @@