Merge pull request satijalab#6440 from 10XGenomics/feat/xenium

Xenium support
wicky-dot · Nov 17, 2022 · 14115e2 · 14115e2
2 parents b23ae2e + 2b3584c
commit 14115e2
Show file tree

Hide file tree

Showing 5 changed files with 280 additions and 7 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -122,4 +122,5 @@ Suggests:
     enrichR,
     mixtools,
     ggrastr,
-    data.table
+    data.table,
+    R.utils
diff --git a/NAMESPACE b/NAMESPACE
@@ -239,6 +239,7 @@ export(LoadHuBMAPCODEX)
 export(LoadNanostring)
 export(LoadSTARmap)
 export(LoadVizgen)
+export(LoadXenium)
 export(Loadings)
 export(LocalStruct)
 export(LogNormalize)
@@ -289,6 +290,7 @@ export(ReadSTARsolo)
 export(ReadSlideSeq)
 export(ReadVitessce)
 export(ReadVizgen)
+export(ReadXenium)
 export(Reductions)
 export(RegroupIdents)
 export(RelativeCounts)

diff --git a/R/convenience.R b/R/convenience.R
@@ -170,6 +170,41 @@ LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) {
   return(obj)
 }
 
+#' @return \code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object
+#'
+#' @importFrom SeuratObject Cells CreateCentroids CreateFOV
+#' CreateSegmentation CreateSeuratObject
+#'
+#' @export
+#'
+#' @rdname ReadXenium
+#'
+LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
+  data <- ReadXenium(
+    data.dir = data.dir,
+    type = c("centroids", "segmentations"),
+  )
+
+  segmentations.data <- list(
+    "centroids" = CreateCentroids(data$centroids),
+    "segmentation" = CreateSegmentation(data$segmentations)
+  )
+  coords <- CreateFOV(
+    coords = segmentations.data,
+    type = c("segmentation", "centroids"),
+    molecules = data$microns,
+    assay = assay
+  )
+
+  xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
+  xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]])
+  xenium.obj[["ControlCodeword"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]])
+  xenium.obj[["ControlProbe"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]])
+
+  xenium.obj[[fov]] <- coords
+  return(xenium.obj)
+}
+
 #' @param ... Extra parameters passed to \code{DimHeatmap}
 #'
 #' @rdname DimHeatmap

diff --git a/R/preprocessing.R b/R/preprocessing.R
@@ -792,6 +792,7 @@ Read10X <- function(
   strip.suffix = FALSE
 ) {
   full.data <- list()
+  has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
   for (i in seq_along(along.with = data.dir)) {
     run <- data.dir[i]
     if (!dir.exists(paths = run)) {
@@ -820,7 +821,12 @@ Read10X <- function(
       stop("Expression matrix file missing. Expecting ", basename(path = matrix.loc))
     }
     data <- readMM(file = matrix.loc)
-    cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL)
+    if (has_dt) {
+      cell.barcodes <- as.data.frame(data.table::fread(barcode.loc, header = FALSE))
+    } else {
+      cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL)
+    }
+
     if (ncol(x = cell.barcodes) > 1) {
       cell.names <- cell.barcodes[, cell.column]
     } else {
@@ -843,11 +849,17 @@ Read10X <- function(
     } else {
       colnames(x = data) <- paste0(names(x = data.dir)[i], "_", cell.names)
     }
-    feature.names <- read.delim(
-      file = ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc),
-      header = FALSE,
-      stringsAsFactors = FALSE
-    )
+
+    if (has_dt) {
+      feature.names <- as.data.frame(data.table::fread(ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc), header = FALSE))
+    } else {
+      feature.names <- read.delim(
+        file = ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc),
+        header = FALSE,
+        stringsAsFactors = FALSE
+      )
+    }
+
     if (any(is.na(x = feature.names[, gene.column]))) {
       warning(
         'Some features names are NA. Replacing NA names with ID from the opposite column requested',
@@ -1964,6 +1976,158 @@ ReadNanostring <- function(
   return(outs)
 }
 
+#' Read and Load 10x Genomics Xenium in-situ data
+#'
+#' @param data.dir Directory containing all Xenium output files with
+#' default filenames
+#' @param outs Types of molecular outputs to read; choose one or more of:
+#' \itemize{
+#'  \item \dQuote{matrix}: the counts matrix
+#'  \item \dQuote{microns}: molecule coordinates
+#' }
+#' @param type Type of cell spatial coordinate matrices to read; choose one
+#' or more of:
+#' \itemize{
+#'  \item \dQuote{centroids}: cell centroids in pixel coordinate space
+#'  \item \dQuote{segmentations}: cell segmentations in pixel coordinate space
+#' }
+#' @param mols.qv.threshold Remove transcript molecules with
+#' a QV less than this threshold. QV >= 20 is the standard threshold
+#' used to construct the cell x gene count matrix.
+#' @param mols.filter Filter molecules that match provided string
+#' @param genes.filter Filter genes from cell x gene matrix that match
+#' provided string
+
+#' @param subset.counts.matrix If the counts matrix should be built from
+#' molecule coordinates for a specific segmentation; One of:
+#' \itemize{
+#'  \item \dQuote{Nuclear}: nuclear segmentations
+#'  \item \dQuote{Cytoplasm}: cell cytoplasm segmentations
+#'  \item \dQuote{Membrane}: cell membrane segmentations
+#' }
+#' @param cell.mols.only If TRUE, only load molecules within a cell
+#'
+#' @return \code{ReadXenium}: A list with some combination of the
+#' following values:
+#' \itemize{
+#'  \item \dQuote{\code{matrix}}: a
+#'  \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells
+#'   are columns and features are rows
+#'  \item \dQuote{\code{centroids}}: a data frame with cell centroid
+#'   coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell}
+#'  \item \dQuote{\code{pixels}}: a data frame with molecule pixel coordinates
+#'   in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene}
+#' }
+#'
+#'
+#' @export
+#' @concept preprocessing
+#'
+ReadXenium <- function(
+  data.dir,
+  outs = c("matrix", "microns"),
+  type = "centroids",
+  mols.qv.threshold = 20
+) {
+  # Argument checking
+  type <- match.arg(
+    arg = type,
+    choices = c("centroids", "segmentations"),
+    several.ok = TRUE
+  )
+
+  outs <- match.arg(
+    arg = outs,
+    choices = c("matrix", "microns"),
+    several.ok = TRUE
+  )
+
+  outs <- c(outs, type)
+
+  has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
+
+  data <- sapply(outs, function(otype) {
+    switch(
+      EXPR = otype,
+      'matrix' = {
+        pmtx <- progressor()
+        pmtx(message = 'Reading counts matrix', class = 'sticky', amount = 0)
+        matrix <- suppressWarnings(Read10X(data.dir = file.path(data.dir, "cell_feature_matrix/")))
+        pmtx(type = "finish")
+        matrix
+      },
+      'centroids' = {
+        pcents <- progressor()
+        pcents(
+          message = 'Loading cell centroids',
+          class = 'sticky',
+          amount = 0
+        )
+        if (has_dt) {
+          cell_info <- as.data.frame(data.table::fread(file.path(data.dir, "cells.csv.gz")))
+        } else {
+          cell_info <- read.csv(file.path(data.dir, "cells.csv.gz"))
+        }
+        cell_centroid_df <- data.frame(
+          x = cell_info$x_centroid,
+          y = cell_info$y_centroid,
+          cell = cell_info$cell_id,
+          stringsAsFactors = FALSE
+        )
+        pcents(type = 'finish')
+        cell_centroid_df
+      },
+      'segmentations' = {
+        psegs <- progressor()
+        psegs(
+          message = 'Loading cell segmentations',
+          class = 'sticky',
+          amount = 0
+        )
+
+        # load cell boundaries
+        if (has_dt) {
+          cell_boundaries_df <- as.data.frame(data.table::fread(file.path(data.dir, "cell_boundaries.csv.gz")))
+        } else {
+          cell_boundaries_df <- read.csv(file.path(data.dir, "cell_boundaries.csv.gz"), stringsAsFactors = FALSE)
+        }
+        names(cell_boundaries_df) <- c("cell", "x", "y")
+        psegs(type = "finish")
+        cell_boundaries_df
+      },
+      'microns' = {
+        pmicrons <- progressor()
+        pmicrons(
+          message = "Loading molecule coordinates",
+          class = 'sticky',
+          amount = 0
+        )
+
+        # molecules
+        if (has_dt) {
+          tx_dt <- as.data.frame(data.table::fread(file.path(data.dir, "transcripts.csv.gz")))
+          transcripts <- subset(tx_dt, qv >= mols.qv.threshold)
+        } else {
+          transcripts <- read.csv(file.path(data.dir, "transcripts.csv.gz"))
+          transcripts <- subset(transcripts, qv >= mols.qv.threshold)
+        }
+
+        df <-
+          data.frame(
+            x = transcripts$x_location,
+            y = transcripts$y_location,
+            gene = transcripts$feature_name,
+            stringsAsFactors = FALSE
+          )
+        pmicrons(type = 'finish')
+        df
+      },
+      stop("Unknown Xenium input type: ", otype)
+    )
+  }, USE.NAMES = TRUE)
+  return(data)
+}
+
 #' Load Slide-seq spatial data
 #'
 #' @param coord.file Path to csv file containing bead coordinate positions

diff --git a/man/ReadXenium.Rd b/man/ReadXenium.Rd