change HTODemux: simplify results added in metadata; switch default c…

…lustering to clara
randel · Jun 27, 2018 · 4f39dbe · 4f39dbe
1 parent d206284
commit 4f39dbe
Showing 1 changed file with 22 additions and 22 deletions.
diff --git a/R/multi_modal.R b/R/multi_modal.R
@@ -223,7 +223,7 @@ CollapseSpeciesExpressionMatrix <- function(
 #' @param percent_cutoff The quantile of inferred 'negative' distribution for each HTO - over which the cell is considered 'positive'. Default is 0.999
 #' @param init_centers Initial number of clusters for kmeans of the HTO oligos. Default is the # of samples + 1 (to account for negatives)
 #' @param cluster_nstarts nstarts value for the initial k-means clustering
-#' @param k_function Clustering function for initial HTO grouping. Default is "kmeans", also support "clara" for fast k-medoids clustering on large applications
+#' @param k_function Clustering function for initial HTO grouping. Default is "clara" for fast k-medoids clustering on large applications, also support "kmeans" for kmeans clustering
 #' @param nsamples Number of samples to be drawn from the dataset used for clustering, for k_function = "clara"
 #' @param print.output Prints the output
 #' @param assay.type Naming of HTO assay
@@ -244,7 +244,7 @@ HTODemux <- function(
   percent_cutoff = 0.999,
   init_centers = NULL,
   cluster_nstarts = 100,
-  k_function = "kmeans",
+  k_function = "clara",
   nsamples = 100,
   print.output = TRUE,
   assay.type = "HTO",
@@ -303,12 +303,7 @@ HTODemux <- function(
   rownames(x = hto_prob_pos) <- paste(rownames(x = hto_prob_pos), "pos", sep = "_")
   hto_prob_neg <- hto_discrete
   rownames(x = hto_prob_neg) <- paste(rownames(x = hto_prob_neg), "neg", sep = "_")
-  hash_data <- GetAssayData(object = object, assay.type = assay.type)
-  hash_raw_data <- GetAssayData(
-    object = object,
-    assay.type = assay.type,
-    slot = "raw.data"
-  )
+
   # for each HTO, we will use the minimum cluster for fitting
   # we will also store positive and negative distributions for all cells after determining the cutoff
   hto_dist_pos = list()
@@ -365,7 +360,6 @@ HTODemux <- function(
   donor_id = rownames(x = hash_data)
   hash_max <- apply(X = hash_data, MARGIN = 2, FUN = max)
   hash_maxID <- apply(X = hash_data, MARGIN = 2, FUN = which.max)
-  # hash_second <- apply(X = hash_data, MARGIN = 2, FUN = function(x) MaxN(x, 2))
   hash_second <- apply(X = hash_data, MARGIN = 2, FUN = MaxN, N = 2)
   hash_maxID <- as.character(x = donor_id[sapply(
     X = 1:ncol(x = hash_data),
@@ -387,42 +381,46 @@ HTODemux <- function(
   hto_classification[hto_classification_global == "Singlet"] <- hash_maxID[which(hto_classification_global == "Singlet")]
   hto_classification[hto_classification_global == "Doublet"] <- doublet_id[which(hto_classification_global == "Doublet")]
   classification_metadata <- data.frame(
-    hash_max,
     hash_maxID,
-    hash_second,
     hash_secondID, hash_margin,
     hto_classification,
     hto_classification_global
   )
   object <- AddMetaData(object = object, metadata = classification_metadata)
-  hto_shortID <- paste(object@meta.data$hash_maxID, object@meta.data$hto_classification_global, sep = "_")
-  names(x = hto_shortID) <- object@cell.names
-  object <- AddMetaData(object = object, metadata = hto_shortID, col.name = "hto_shortID")
-  object <- SetAllIdent(object = object, id = "hto_shortID")
+  # hto_shortID <- paste([email protected]$hash_maxID, [email protected]$hto_classification_global, sep = "_")
+  # names(x = hto_shortID) <- [email protected]
+  # object <- AddMetaData(object = object, metadata = hto_shortID, col.name = "hto_shortID")
+  # object <- SetAllIdent(object = object, id = "hto_shortID")
   object_pn <- data.frame(t(x = rbind(hto_prob_neg, hto_prob_pos)))
-  object <- AddMetaData(object = object, metadata = object_pn)
+  # object <- AddMetaData(object = object, metadata = object_pn)
   confidence_threshold <- log(x = confidence_threshold)
   object@meta.data$confidence <- "Confident"
-  singlet_data <- subset(x = object@meta.data, subset = hto_classification_global == "Singlet")
+
+  singlet_cells <- WhichCells(object = object,subset.name = "hto_classification_global",accept.value = "Singlet")
+  singlet_data <- cbind(object_pn[singlet_cells,],object@meta.data[singlet_cells,"hash_maxID"])
+  colnames(singlet_data)[ncol(singlet_data)] <- "hash_maxID"
   singlet_pos <- sapply(
     X = 1:nrow(x = singlet_data),
     FUN = function(x) {
       return(singlet_data[
         x,
         paste(as.character(x = singlet_data[x, "hash_maxID"]), "pos", sep = "_")
-      ])
+        ])
     }
   )
   names(x = singlet_pos) <- rownames(x = singlet_data)
   object@meta.data[names(x = which(x = singlet_pos < confidence_threshold)), "confidence"] <- "Uncertain"
-  doublet_data <- subset(x = object@meta.data, hto_classification_global == "Doublet")
+
+  doublet_cells <- WhichCells(object = object,subset.name = "hto_classification_global",accept.value = "Doublet")
+  doublet_data <- cbind(object_pn[doublet_cells,],object@meta.data[doublet_cells,c("hash_maxID","hash_secondID")])
+  colnames(doublet_data)[c(ncol(doublet_data)-1,ncol(doublet_data))] <- c("hash_maxID","hash_secondID")
   doublet_pos <- sapply(
     X = 1:nrow(x = doublet_data),
     FUN = function(x) {
       return(doublet_data[
         x,
         paste(as.character(x = doublet_data[x, "hash_maxID"]), "pos", sep = "_")
-      ])
+        ])
     }
   )
   names(x = doublet_pos) <- rownames(x = doublet_data)
@@ -433,14 +431,16 @@ HTODemux <- function(
       return(doublet_data[
         x,
         paste(as.character(x = doublet_data[x, "hash_secondID"]), "pos", sep = "_")
-      ])
+        ])
     }
   )
   names(x = doublet_pos) <- rownames(x = doublet_data)
   object@meta.data[names(x = which(x = doublet_pos < confidence_threshold)), "confidence"] <- "Uncertain"
   if (print.output) {
     print(x = table(object@meta.data$hto_classification_global, object@meta.data$confidence))
   }
+  object=SetAllIdent(object = object,id = "hto_classification")
+
   return(object)
 }
 
@@ -596,7 +596,7 @@ HTOHeatmap <- function(
 
   objmini@ident <- factor(objmini@ident, c(singlet_id,"Multiplet","Negative"))
   cells.ordered=as.character(unlist(sapply(heatmap_levels,function(x) sample(FastWhichCells(objmini,group.by = hto.classification,x)))))
-  objmini <- ScaleData(objmini,assay.type = assay.type)
+  objmini <- ScaleData(objmini,assay.type = assay.type,display.progress = FALSE)
 
   if (!is.null(singlet.names)){
     levels(objmini@ident) <- c(singlet.names, "Multiplet", "Negative")