Skip to content

Commit

Permalink
Merge pull request satijalab#2 from jsicherman/jordan.sicherman/xenium
Browse files Browse the repository at this point in the history
Optional loading, and data.table
  • Loading branch information
pmarks authored Nov 9, 2022
2 parents 7400d58 + ec44c18 commit 425a0ab
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 29 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,5 @@ Suggests:
enrichR,
mixtools,
ggrastr,
data.table
data.table,
R.utils
6 changes: 5 additions & 1 deletion R/convenience.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) {
#'
#' @rdname ReadXenium
#'
LoadXenium <- function(data.dir, fov, assay = 'Xenium') {
LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
data <- ReadXenium(
data.dir = data.dir,
type = c("centroids", "segmentations"),
Expand All @@ -197,6 +197,10 @@ LoadXenium <- function(data.dir, fov, assay = 'Xenium') {
)

xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
xenium.obj[["BLANK"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]])
xenium.obj[["DUMMY"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]])
xenium.obj[["ERCC"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]])

xenium.obj[[fov]] <- coords
return(xenium.obj)
}
Expand Down
85 changes: 58 additions & 27 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ Read10X <- function(
strip.suffix = FALSE
) {
full.data <- list()
has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
for (i in seq_along(along.with = data.dir)) {
run <- data.dir[i]
if (!dir.exists(paths = run)) {
Expand Down Expand Up @@ -815,7 +816,12 @@ Read10X <- function(
stop("Expression matrix file missing. Expecting ", basename(path = matrix.loc))
}
data <- readMM(file = matrix.loc)
cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL)
if (has_dt) {
cell.barcodes <- as.data.frame(data.table::fread(barcode.loc, header = FALSE))
} else {
cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL)
}

if (ncol(x = cell.barcodes) > 1) {
cell.names <- cell.barcodes[, cell.column]
} else {
Expand All @@ -838,11 +844,17 @@ Read10X <- function(
} else {
colnames(x = data) <- paste0(names(x = data.dir)[i], "_", cell.names)
}
feature.names <- read.delim(
file = ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc),
header = FALSE,
stringsAsFactors = FALSE
)

if (has_dt) {
feature.names <- as.data.frame(data.table::fread(ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc), header = FALSE))
} else {
feature.names <- read.delim(
file = ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc),
header = FALSE,
stringsAsFactors = FALSE
)
}

if (any(is.na(x = feature.names[, gene.column]))) {
warning(
'Some features names are NA. Replacing NA names with ID from the opposite column requested',
Expand Down Expand Up @@ -1953,14 +1965,17 @@ ReadNanostring <- function(
#'
#' @param data.dir Directory containing all Xenium output files with
#' default filenames
#' @param outs Types of molecular outputs to read; choose one or more of:
#' \itemize{
#' \item \dQuote{matrix}: the counts matrix
#' \item \dQuote{microns}: molecule coordinates
#' }
#' @param type Type of cell spatial coordinate matrices to read; choose one
#' or more of:
#' \itemize{
#' \item \dQuote{centroids}: cell centroids in pixel coordinate space
#' \item \dQuote{segmentations}: cell segmentations in pixel coordinate space
#' }
#' @param read.mols Whether or not to read per-transcript data. Can be very
#' slow and consume a lot of memory for large datasets.
#' @param mols.qv.threshold Remove transcript molecules with
#' a QV less than this threshold. QV >= 20 is the standard threshold
#' used to construct the cell x gene count matrix.
Expand Down Expand Up @@ -1995,25 +2010,29 @@ ReadNanostring <- function(
#'
ReadXenium <- function(
data.dir,
type = 'centroids',
read.mols = TRUE,
outs = c("matrix", "microns"),
type = "centroids",
mols.qv.threshold = 20
) {

# Argument checking
type <- match.arg(
arg = type,
choices = c('centroids', 'segmentations'),
choices = c("centroids", "segmentations"),
several.ok = TRUE
)

outs <- list("matrix"=NULL, "microns"=NULL, "centroids"=NULL)
if ("segmentations" %in% type) {
outs <- append(outs, list("segmentations" = NULL))
}
outs <- match.arg(
arg = outs,
choices = c("matrix", "microns"),
several.ok = TRUE
)

for (otype in names(x = outs)) {
outs[[otype]] <- switch(
outs <- c(outs, type)

has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)

data <- sapply(outs, function(otype) {
switch(
EXPR = otype,
'matrix' = {
pmtx <- progressor()
Expand All @@ -2029,7 +2048,11 @@ ReadXenium <- function(
class = 'sticky',
amount = 0
)
cell_info <- read.csv(file.path(data.dir, "cells.csv.gz"))
if (has_dt) {
cell_info <- as.data.frame(data.table::fread(file.path(data.dir, "cells.csv.gz")))
} else {
cell_info <- read.csv(file.path(data.dir, "cells.csv.gz"))
}
cell_centroid_df <- data.frame(
x = cell_info$x_centroid,
y = cell_info$y_centroid,
Expand All @@ -2048,7 +2071,11 @@ ReadXenium <- function(
)

# load cell boundaries
cell_boundaries_df <- read.csv(file.path(data.dir, "cell_boundaries.csv.gz"), stringsAsFactors = FALSE)
if (has_dt) {
cell_boundaries_df <- as.data.frame(data.table::fread(file.path(data.dir, "cell_boundaries.csv.gz")))
} else {
cell_boundaries_df <- read.csv(file.path(data.dir, "cell_boundaries.csv.gz"), stringsAsFactors = FALSE)
}
names(cell_boundaries_df) <- c("cell", "x", "y")
psegs(type = "finish")
cell_boundaries_df
Expand All @@ -2061,11 +2088,15 @@ ReadXenium <- function(
amount = 0
)

# molecules
transcripts <- read.csv(file.path(data.dir, "transcripts.csv.gz"))
transcripts <- subset(transcripts, qv >= mols.qv.threshold)
# molecules
if (has_dt) {
transcripts <- as.data.frame(data.table::fread(file.path(data.dir, "transcripts.csv.gz"))[qv >= mols.qv.threshold])
} else {
transcripts <- read.csv(file.path(data.dir, "transcripts.csv.gz"))
transcripts <- subset(transcripts, qv >= mols.qv.threshold)
}

df <-
df <-
data.frame(
x = transcripts$x_location,
y = transcripts$y_location,
Expand All @@ -2075,10 +2106,10 @@ ReadXenium <- function(
pmicrons(type = 'finish')
df
},
stop("Unknown Xenium input type: ", outs[[otype]])
stop("Unknown Xenium input type: ", otype)
)
}
return(outs)
}, USE.NAMES = TRUE)
return(data)
}

#' Load Slide-seq spatial data
Expand Down
71 changes: 71 additions & 0 deletions man/ReadXenium.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 425a0ab

Please sign in to comment.