Skip to content

Commit

Permalink
Merge pull request satijalab#6440 from 10XGenomics/feat/xenium
Browse files Browse the repository at this point in the history
Xenium support
  • Loading branch information
AustinHartman authored Nov 17, 2022
2 parents b23ae2e + 2b3584c commit 14115e2
Show file tree
Hide file tree
Showing 5 changed files with 280 additions and 7 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,5 @@ Suggests:
enrichR,
mixtools,
ggrastr,
data.table
data.table,
R.utils
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ export(LoadHuBMAPCODEX)
export(LoadNanostring)
export(LoadSTARmap)
export(LoadVizgen)
export(LoadXenium)
export(Loadings)
export(LocalStruct)
export(LogNormalize)
Expand Down Expand Up @@ -289,6 +290,7 @@ export(ReadSTARsolo)
export(ReadSlideSeq)
export(ReadVitessce)
export(ReadVizgen)
export(ReadXenium)
export(Reductions)
export(RegroupIdents)
export(RelativeCounts)
Expand Down
35 changes: 35 additions & 0 deletions R/convenience.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,41 @@ LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) {
return(obj)
}

#' @return \code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object
#'
#' @importFrom SeuratObject Cells CreateCentroids CreateFOV
#' CreateSegmentation CreateSeuratObject
#'
#' @export
#'
#' @rdname ReadXenium
#'
LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
data <- ReadXenium(
data.dir = data.dir,
type = c("centroids", "segmentations"),
)

segmentations.data <- list(
"centroids" = CreateCentroids(data$centroids),
"segmentation" = CreateSegmentation(data$segmentations)
)
coords <- CreateFOV(
coords = segmentations.data,
type = c("segmentation", "centroids"),
molecules = data$microns,
assay = assay
)

xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]])
xenium.obj[["ControlCodeword"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]])
xenium.obj[["ControlProbe"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]])

xenium.obj[[fov]] <- coords
return(xenium.obj)
}

#' @param ... Extra parameters passed to \code{DimHeatmap}
#'
#' @rdname DimHeatmap
Expand Down
176 changes: 170 additions & 6 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,7 @@ Read10X <- function(
strip.suffix = FALSE
) {
full.data <- list()
has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
for (i in seq_along(along.with = data.dir)) {
run <- data.dir[i]
if (!dir.exists(paths = run)) {
Expand Down Expand Up @@ -820,7 +821,12 @@ Read10X <- function(
stop("Expression matrix file missing. Expecting ", basename(path = matrix.loc))
}
data <- readMM(file = matrix.loc)
cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL)
if (has_dt) {
cell.barcodes <- as.data.frame(data.table::fread(barcode.loc, header = FALSE))
} else {
cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL)
}

if (ncol(x = cell.barcodes) > 1) {
cell.names <- cell.barcodes[, cell.column]
} else {
Expand All @@ -843,11 +849,17 @@ Read10X <- function(
} else {
colnames(x = data) <- paste0(names(x = data.dir)[i], "_", cell.names)
}
feature.names <- read.delim(
file = ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc),
header = FALSE,
stringsAsFactors = FALSE
)

if (has_dt) {
feature.names <- as.data.frame(data.table::fread(ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc), header = FALSE))
} else {
feature.names <- read.delim(
file = ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc),
header = FALSE,
stringsAsFactors = FALSE
)
}

if (any(is.na(x = feature.names[, gene.column]))) {
warning(
'Some features names are NA. Replacing NA names with ID from the opposite column requested',
Expand Down Expand Up @@ -1964,6 +1976,158 @@ ReadNanostring <- function(
return(outs)
}

#' Read and Load 10x Genomics Xenium in-situ data
#'
#' @param data.dir Directory containing all Xenium output files with
#' default filenames
#' @param outs Types of molecular outputs to read; choose one or more of:
#' \itemize{
#' \item \dQuote{matrix}: the counts matrix
#' \item \dQuote{microns}: molecule coordinates
#' }
#' @param type Type of cell spatial coordinate matrices to read; choose one
#' or more of:
#' \itemize{
#' \item \dQuote{centroids}: cell centroids in pixel coordinate space
#' \item \dQuote{segmentations}: cell segmentations in pixel coordinate space
#' }
#' @param mols.qv.threshold Remove transcript molecules with
#' a QV less than this threshold. QV >= 20 is the standard threshold
#' used to construct the cell x gene count matrix.
#' @param mols.filter Filter molecules that match provided string
#' @param genes.filter Filter genes from cell x gene matrix that match
#' provided string

#' @param subset.counts.matrix If the counts matrix should be built from
#' molecule coordinates for a specific segmentation; One of:
#' \itemize{
#' \item \dQuote{Nuclear}: nuclear segmentations
#' \item \dQuote{Cytoplasm}: cell cytoplasm segmentations
#' \item \dQuote{Membrane}: cell membrane segmentations
#' }
#' @param cell.mols.only If TRUE, only load molecules within a cell
#'
#' @return \code{ReadXenium}: A list with some combination of the
#' following values:
#' \itemize{
#' \item \dQuote{\code{matrix}}: a
#' \link[Matrix:dgCMatrix-class]{sparse matrix} with expression data; cells
#' are columns and features are rows
#' \item \dQuote{\code{centroids}}: a data frame with cell centroid
#' coordinates in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{cell}
#' \item \dQuote{\code{pixels}}: a data frame with molecule pixel coordinates
#' in three columns: \dQuote{x}, \dQuote{y}, and \dQuote{gene}
#' }
#'
#'
#' @export
#' @concept preprocessing
#'
ReadXenium <- function(
data.dir,
outs = c("matrix", "microns"),
type = "centroids",
mols.qv.threshold = 20
) {
# Argument checking
type <- match.arg(
arg = type,
choices = c("centroids", "segmentations"),
several.ok = TRUE
)

outs <- match.arg(
arg = outs,
choices = c("matrix", "microns"),
several.ok = TRUE
)

outs <- c(outs, type)

has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)

data <- sapply(outs, function(otype) {
switch(
EXPR = otype,
'matrix' = {
pmtx <- progressor()
pmtx(message = 'Reading counts matrix', class = 'sticky', amount = 0)
matrix <- suppressWarnings(Read10X(data.dir = file.path(data.dir, "cell_feature_matrix/")))
pmtx(type = "finish")
matrix
},
'centroids' = {
pcents <- progressor()
pcents(
message = 'Loading cell centroids',
class = 'sticky',
amount = 0
)
if (has_dt) {
cell_info <- as.data.frame(data.table::fread(file.path(data.dir, "cells.csv.gz")))
} else {
cell_info <- read.csv(file.path(data.dir, "cells.csv.gz"))
}
cell_centroid_df <- data.frame(
x = cell_info$x_centroid,
y = cell_info$y_centroid,
cell = cell_info$cell_id,
stringsAsFactors = FALSE
)
pcents(type = 'finish')
cell_centroid_df
},
'segmentations' = {
psegs <- progressor()
psegs(
message = 'Loading cell segmentations',
class = 'sticky',
amount = 0
)

# load cell boundaries
if (has_dt) {
cell_boundaries_df <- as.data.frame(data.table::fread(file.path(data.dir, "cell_boundaries.csv.gz")))
} else {
cell_boundaries_df <- read.csv(file.path(data.dir, "cell_boundaries.csv.gz"), stringsAsFactors = FALSE)
}
names(cell_boundaries_df) <- c("cell", "x", "y")
psegs(type = "finish")
cell_boundaries_df
},
'microns' = {
pmicrons <- progressor()
pmicrons(
message = "Loading molecule coordinates",
class = 'sticky',
amount = 0
)

# molecules
if (has_dt) {
tx_dt <- as.data.frame(data.table::fread(file.path(data.dir, "transcripts.csv.gz")))
transcripts <- subset(tx_dt, qv >= mols.qv.threshold)
} else {
transcripts <- read.csv(file.path(data.dir, "transcripts.csv.gz"))
transcripts <- subset(transcripts, qv >= mols.qv.threshold)
}

df <-
data.frame(
x = transcripts$x_location,
y = transcripts$y_location,
gene = transcripts$feature_name,
stringsAsFactors = FALSE
)
pmicrons(type = 'finish')
df
},
stop("Unknown Xenium input type: ", otype)
)
}, USE.NAMES = TRUE)
return(data)
}

#' Load Slide-seq spatial data
#'
#' @param coord.file Path to csv file containing bead coordinate positions
Expand Down
71 changes: 71 additions & 0 deletions man/ReadXenium.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 14115e2

Please sign in to comment.