forked from satijalab/seurat
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
change HTODemux: simplify results added in metadata; switch default c…
…lustering to clara
- Loading branch information
Shiwei Zheng
committed
Jun 27, 2018
1 parent
d206284
commit 4f39dbe
Showing
1 changed file
with
22 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -223,7 +223,7 @@ CollapseSpeciesExpressionMatrix <- function( | |
#' @param percent_cutoff The quantile of inferred 'negative' distribution for each HTO - over which the cell is considered 'positive'. Default is 0.999 | ||
#' @param init_centers Initial number of clusters for kmeans of the HTO oligos. Default is the # of samples + 1 (to account for negatives) | ||
#' @param cluster_nstarts nstarts value for the initial k-means clustering | ||
#' @param k_function Clustering function for initial HTO grouping. Default is "kmeans", also support "clara" for fast k-medoids clustering on large applications | ||
#' @param k_function Clustering function for initial HTO grouping. Default is "clara" for fast k-medoids clustering on large applications, also support "kmeans" for kmeans clustering | ||
#' @param nsamples Number of samples to be drawn from the dataset used for clustering, for k_function = "clara" | ||
#' @param print.output Prints the output | ||
#' @param assay.type Naming of HTO assay | ||
|
@@ -244,7 +244,7 @@ HTODemux <- function( | |
percent_cutoff = 0.999, | ||
init_centers = NULL, | ||
cluster_nstarts = 100, | ||
k_function = "kmeans", | ||
k_function = "clara", | ||
nsamples = 100, | ||
print.output = TRUE, | ||
assay.type = "HTO", | ||
|
@@ -303,12 +303,7 @@ HTODemux <- function( | |
rownames(x = hto_prob_pos) <- paste(rownames(x = hto_prob_pos), "pos", sep = "_") | ||
hto_prob_neg <- hto_discrete | ||
rownames(x = hto_prob_neg) <- paste(rownames(x = hto_prob_neg), "neg", sep = "_") | ||
hash_data <- GetAssayData(object = object, assay.type = assay.type) | ||
hash_raw_data <- GetAssayData( | ||
object = object, | ||
assay.type = assay.type, | ||
slot = "raw.data" | ||
) | ||
|
||
# for each HTO, we will use the minimum cluster for fitting | ||
# we will also store positive and negative distributions for all cells after determining the cutoff | ||
hto_dist_pos = list() | ||
|
@@ -365,7 +360,6 @@ HTODemux <- function( | |
donor_id = rownames(x = hash_data) | ||
hash_max <- apply(X = hash_data, MARGIN = 2, FUN = max) | ||
hash_maxID <- apply(X = hash_data, MARGIN = 2, FUN = which.max) | ||
# hash_second <- apply(X = hash_data, MARGIN = 2, FUN = function(x) MaxN(x, 2)) | ||
hash_second <- apply(X = hash_data, MARGIN = 2, FUN = MaxN, N = 2) | ||
hash_maxID <- as.character(x = donor_id[sapply( | ||
X = 1:ncol(x = hash_data), | ||
|
@@ -387,42 +381,46 @@ HTODemux <- function( | |
hto_classification[hto_classification_global == "Singlet"] <- hash_maxID[which(hto_classification_global == "Singlet")] | ||
hto_classification[hto_classification_global == "Doublet"] <- doublet_id[which(hto_classification_global == "Doublet")] | ||
classification_metadata <- data.frame( | ||
hash_max, | ||
hash_maxID, | ||
hash_second, | ||
hash_secondID, hash_margin, | ||
hto_classification, | ||
hto_classification_global | ||
) | ||
object <- AddMetaData(object = object, metadata = classification_metadata) | ||
hto_shortID <- paste(object@meta.data$hash_maxID, object@meta.data$hto_classification_global, sep = "_") | ||
names(x = hto_shortID) <- object@cell.names | ||
object <- AddMetaData(object = object, metadata = hto_shortID, col.name = "hto_shortID") | ||
object <- SetAllIdent(object = object, id = "hto_shortID") | ||
# hto_shortID <- paste([email protected]$hash_maxID, [email protected]$hto_classification_global, sep = "_") | ||
# names(x = hto_shortID) <- [email protected] | ||
# object <- AddMetaData(object = object, metadata = hto_shortID, col.name = "hto_shortID") | ||
# object <- SetAllIdent(object = object, id = "hto_shortID") | ||
object_pn <- data.frame(t(x = rbind(hto_prob_neg, hto_prob_pos))) | ||
object <- AddMetaData(object = object, metadata = object_pn) | ||
# object <- AddMetaData(object = object, metadata = object_pn) | ||
confidence_threshold <- log(x = confidence_threshold) | ||
object@meta.data$confidence <- "Confident" | ||
singlet_data <- subset(x = object@meta.data, subset = hto_classification_global == "Singlet") | ||
|
||
singlet_cells <- WhichCells(object = object,subset.name = "hto_classification_global",accept.value = "Singlet") | ||
singlet_data <- cbind(object_pn[singlet_cells,],object@meta.data[singlet_cells,"hash_maxID"]) | ||
colnames(singlet_data)[ncol(singlet_data)] <- "hash_maxID" | ||
singlet_pos <- sapply( | ||
X = 1:nrow(x = singlet_data), | ||
FUN = function(x) { | ||
return(singlet_data[ | ||
x, | ||
paste(as.character(x = singlet_data[x, "hash_maxID"]), "pos", sep = "_") | ||
]) | ||
]) | ||
} | ||
) | ||
names(x = singlet_pos) <- rownames(x = singlet_data) | ||
object@meta.data[names(x = which(x = singlet_pos < confidence_threshold)), "confidence"] <- "Uncertain" | ||
doublet_data <- subset(x = object@meta.data, hto_classification_global == "Doublet") | ||
|
||
doublet_cells <- WhichCells(object = object,subset.name = "hto_classification_global",accept.value = "Doublet") | ||
doublet_data <- cbind(object_pn[doublet_cells,],object@meta.data[doublet_cells,c("hash_maxID","hash_secondID")]) | ||
colnames(doublet_data)[c(ncol(doublet_data)-1,ncol(doublet_data))] <- c("hash_maxID","hash_secondID") | ||
doublet_pos <- sapply( | ||
X = 1:nrow(x = doublet_data), | ||
FUN = function(x) { | ||
return(doublet_data[ | ||
x, | ||
paste(as.character(x = doublet_data[x, "hash_maxID"]), "pos", sep = "_") | ||
]) | ||
]) | ||
} | ||
) | ||
names(x = doublet_pos) <- rownames(x = doublet_data) | ||
|
@@ -433,14 +431,16 @@ HTODemux <- function( | |
return(doublet_data[ | ||
x, | ||
paste(as.character(x = doublet_data[x, "hash_secondID"]), "pos", sep = "_") | ||
]) | ||
]) | ||
} | ||
) | ||
names(x = doublet_pos) <- rownames(x = doublet_data) | ||
object@meta.data[names(x = which(x = doublet_pos < confidence_threshold)), "confidence"] <- "Uncertain" | ||
if (print.output) { | ||
print(x = table(object@meta.data$hto_classification_global, object@meta.data$confidence)) | ||
} | ||
object=SetAllIdent(object = object,id = "hto_classification") | ||
|
||
return(object) | ||
} | ||
|
||
|
@@ -596,7 +596,7 @@ HTOHeatmap <- function( | |
|
||
objmini@ident <- factor(objmini@ident, c(singlet_id,"Multiplet","Negative")) | ||
cells.ordered=as.character(unlist(sapply(heatmap_levels,function(x) sample(FastWhichCells(objmini,group.by = hto.classification,x))))) | ||
objmini <- ScaleData(objmini,assay.type = assay.type) | ||
objmini <- ScaleData(objmini,assay.type = assay.type,display.progress = FALSE) | ||
|
||
if (!is.null(singlet.names)){ | ||
levels(objmini@ident) <- c(singlet.names, "Multiplet", "Negative") | ||
|