R/snn.R

#' @include seurat.R
NULL
#' SNN Graph Construction
#'
#' Constructs a Shared Nearest Neighbor (SNN) Graph for a given dataset. We
#' first determine the k-nearest neighbors of each cell (defined by k.param *
#' k.scale). We use this knn graph to construct the SNN graph by calculating the
#' neighborhood overlap (Jaccard distance) between every cell and its k.param *
#' k.scale nearest neighbors (defining the neighborhood for each cell as the
#' k.param nearest neighbors).
#'
#' @param object Seurat object
#' @param genes.use A vector of gene names to use in construction of SNN graph
#' if building directly based on expression data rather than a dimensionally
#' reduced representation (i.e. PCs).
#' @param reduction.type Name of dimensional reduction technique to use in
#' construction of SNN graph. (e.g. "pca", "ica")
#' @param dims.use A vector of the dimensions to use in construction of the SNN
#' graph (e.g. To use the first 10 PCs, pass 1:10)
#' @param k.param Defines k for the k-nearest neighbor algorithm
#' @param k.scale Granularity option for k.param
#' @param plot.SNN Plot the SNN graph
#' @param prune.SNN Sets the cutoff for acceptable Jaccard distances when
#' computing the neighborhood overlap for the SNN construction. Any edges with
#' values less than or equal to this will be set to 0 and removed from the SNN
#' graph. Essentially sets the strigency of pruning (0 --- no pruning, 1 ---
#' prune everything).
#' @param print.output Whether or not to print output to the console
#' @param distance.matrix Build SNN from distance matrix (experimental)
#' @param force.recalc Force recalculation of SNN.
#' @importFrom FNN get.knn
#' @importFrom igraph plot.igraph graph.adjlist graph.adjacency E
#' @importFrom Matrix sparseMatrix
#' @return Returns the object with object@@snn filled
#' @export
#'
#' @examples
#'
#' pbmc_small
#' # Compute an SNN on the gene expression level
#' pbmc_small <- BuildSNN(pbmc_small, genes.use = pbmc_small@var.genes)
#'
#' # More commonly, we build the SNN on a dimensionally reduced form of the data
#' # such as the first 10 principle components.
#'
#' pbmc_small <- BuildSNN(pbmc_small, reduction.type = "pca", dims.use = 1:10)
#'
BuildSNN <- function(
  object,
  genes.use = NULL,
  reduction.type = "pca",
  dims.use = NULL,
  k.param = 10,
  k.scale = 10,
  plot.SNN = FALSE,
  prune.SNN = 1/15,
  print.output = TRUE,
  distance.matrix = NULL,
  force.recalc = FALSE
) {
  if (! is.null(x = distance.matrix)) {
    data.use <- distance.matrix
    force.recalc <- TRUE
  } else if (is.null(x = dims.use)) {
    genes.use <- SetIfNull(x = genes.use, default = object@var.genes)
    data.use <- t(x = as.matrix(x = object@data[genes.use, ]))
  } else {
    data.use <- GetCellEmbeddings(object = object,
                                  reduction.type = reduction.type,
                                  dims.use = dims.use)
  }
  parameters.to.store <- as.list(environment(), all = TRUE)[names(formals("BuildSNN"))]
  parameters.to.store$object <- NULL
  parameters.to.store$distance.matrix <- NULL
  parameters.to.store$print.output <- NULL
  if (CalcInfoExists(object, "BuildSNN") && ! force.recalc){
    old.parameters <- GetAllCalcParam(object, "BuildSNN")
    old.parameters$time <- NULL
    old.parameters$print.output <- NULL
    if(all(all.equal(old.parameters, parameters.to.store) == TRUE)){
      warning("Build parameters exactly match those of already computed and stored SNN. To force recalculation, set force.recalc to TRUE.")
      return(object)
    }
  }
  object <- SetCalcParams(object = object,
                          calculation = "BuildSNN",
                          ... = parameters.to.store)
  n.cells <- nrow(x = data.use)
  if (n.cells < k.param) {
    warning("k.param set larger than number of cells. Setting k.param to number of cells - 1.")
    k.param <- n.cells - 1
  }
  # find the k-nearest neighbors for each single cell
  if (is.null(x = distance.matrix)) {
    my.knn <- get.knn(
      data <- as.matrix(x = data.use),
      k = min(k.scale * k.param, n.cells - 1)
    )
    nn.ranked <- cbind(1:n.cells, my.knn$nn.index[, 1:(k.param-1)])
    nn.large <- my.knn$nn.index
  } else {
    cat("Building SNN based on a provided distance matrix\n", file = stderr())
    n <- nrow(x = distance.matrix)
    k.for.nn <- k.param * k.scale
    knn.mat <- matrix(data = 0, ncol = k.for.nn, nrow = n)
    knd.mat <- knn.mat
    for (i in 1:n){
      knn.mat[i, ] <- order(data.use[i, ])[1:k.for.nn]
      knd.mat[i, ] <- data.use[i, knn.mat[i, ]]
    }
    nn.large <- knn.mat[, 2:(min(n, k.for.nn))]
    nn.ranked <- knn.mat[, 1:k.param]
  }
  w <- CalcSNNSparse(
    cell.names = object@cell.names,
    k.param = k.param,
    nn.large = nn.large,
    nn.ranked = nn.ranked,
    prune.SNN = prune.SNN,
    print.output = print.output
  )
  object@snn <- w
  if (plot.SNN) {
    if(!"tsne" %in% names(object@dr)) {
      warning("Please compute a tSNE for SNN visualization. See RunTSNE().")
    } else {
      if (nrow(object@dr$tsne@cell.embeddings) != length(x = object@cell.names)) {
        warning("Please compute a tSNE for SNN visualization. See RunTSNE().")
      } else {
        net <- graph.adjacency(
          adjmatrix = as.matrix(w),
          mode = "undirected",
          weighted = TRUE,
          diag = FALSE
        )
        plot.igraph(
          x = net,
          layout = as.matrix(x = object@dr$tsne@cell.embeddings),
          edge.width = E(graph = net)$weight,
          vertex.label = NA,
          vertex.size = 0
        )
      }
    }
  }
  return(object)
}

# Function to convert the knn graph into the snn graph. Stored in a sparse
# representation.

# @param cell.names    A vector of cell names which will correspond to the row/
#                      column names of the SNN
# @param k.param       Defines nearest neighborhood when computing NN graph
# @param nn.large      Full KNN graph (computed with get.knn with k set to
#                      k.param * k.scale)
# @param nn.ranked     Subset of full KNN graph that only contains the first
#                      k.param nearest neighbors. Used to define Jaccard
#                      distances between any two cells
# @param prune.snn     Sets the cutoff for acceptable Jaccard distances when
#                      computing the neighborhood overlap for the SNN
#                      construction. Any edges with values less than or equal to
#                      this will be set to 0 and removed from the SNN graph.
#                      Essentially sets the strigency of pruning (0 --- no
#                      pruning, 1 --- prune everything).
# @param print.output  Whether or not to print output to the console
# @return              Returns an adjacency matrix representation of the SNN
#                      graph
#
#' @importFrom utils txtProgressBar setTxtProgressBar
#
CalcSNNSparse <- function(
  cell.names,
  k.param,
  nn.large,
  nn.ranked,
  prune.SNN,
  print.output
) {
  n.cells <- length(cell.names)
  counter <- 1
  idx1 <- vector(mode = "integer", length = n.cells ^ 2 / k.param)
  idx2 <- vector(mode = "integer", length = n.cells ^ 2 / k.param)
  edge.weight <- vector(mode = "double", length = n.cells ^ 2 / k.param)
  id <- 1
  # fill out the adjacency matrix w with edge weights only between your target
  # cell and its k.scale*k.param-nearest neighbors
  # speed things up (don't have to calculate all pairwise distances)
  # define the edge weights with Jaccard distance
  if (print.output) {
    print("Constructing SNN")
    pb <- txtProgressBar(min = 0, max = n.cells, style = 3)
  }
  for (i in 1:n.cells) {
    for (j in 1:ncol(x = nn.large)) {
      s <- intersect(x = nn.ranked[i, ], y = nn.ranked[nn.large[i, j], ])
      u <- union(nn.ranked[i, ], nn.ranked[nn.large[i, j], ])
      e <- length(x = s) / length(x = u)
      if (e > prune.SNN) {
        idx1[id] <- i
        idx2[id] <- nn.large[i, j]
        edge.weight[id] <- e
        id <- id + 1
      }
    }
    if (print.output) {
      setTxtProgressBar(pb = pb, value = i)
    }
  }
  if (print.output) {
    close(con = pb)
  }
  idx1 <- idx1[! is.na(x = idx1) & idx1 != 0]
  idx2 <- idx2[! is.na(x = idx2) & idx2 != 0]
  edge.weight <- edge.weight[! is.na(x = edge.weight) & edge.weight != 0]
  w <- sparseMatrix(
    i = idx1,
    j = idx2,
    x = edge.weight,
    dims = c(n.cells, n.cells)
  )
  diag(x = w) <- 1
  rownames(x = w) <- cell.names
  colnames(x = w) <- cell.names
  return(w)
}

# This function calculates the pairwise connectivity of clusters.

# @param object  Seurat object containing the snn graph and cluster assignments
# @return        matrix with all pairwise connectivities calculated

CalcConnectivity <- function(object) {
  SNN <- object@snn
  cluster.names <- unique(x = object@ident)
  num.clusters <- length(x = cluster.names)
  connectivity <- matrix(data = 0, nrow = num.clusters, ncol = num.clusters)
  rownames(x = connectivity) <- cluster.names
  colnames(x = connectivity) <- cluster.names
  n <- 1
  for (i in cluster.names) {
    for (j in cluster.names[-(1:n)]) {
      subSNN <- SNN[
        match(x = WhichCells(object = object, ident = i), colnames(x = SNN)),
        match(x = WhichCells(object = object, ident = j), rownames(x = SNN))
      ]
      if (is.object(x = subSNN)) {
        connectivity[i, j] <- sum(subSNN) / (nrow(x = subSNN) * ncol(x = subSNN))
      } else {
        connectivity[i, j] <- mean(x = subSNN)
      }
    }
    n <- n + 1
  }
  return(connectivity)
}