forked from satijalab/seurat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsnn.R
256 lines (251 loc) · 9.31 KB
/
snn.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#' @include seurat.R
NULL
#' SNN Graph Construction
#'
#' Constructs a Shared Nearest Neighbor (SNN) Graph for a given dataset. We
#' first determine the k-nearest neighbors of each cell (defined by k.param *
#' k.scale). We use this knn graph to construct the SNN graph by calculating the
#' neighborhood overlap (Jaccard distance) between every cell and its k.param *
#' k.scale nearest neighbors (defining the neighborhood for each cell as the
#' k.param nearest neighbors).
#'
#' @param object Seurat object
#' @param genes.use A vector of gene names to use in construction of SNN graph
#' if building directly based on expression data rather than a dimensionally
#' reduced representation (i.e. PCs).
#' @param reduction.type Name of dimensional reduction technique to use in
#' construction of SNN graph. (e.g. "pca", "ica")
#' @param dims.use A vector of the dimensions to use in construction of the SNN
#' graph (e.g. To use the first 10 PCs, pass 1:10)
#' @param k.param Defines k for the k-nearest neighbor algorithm
#' @param k.scale Granularity option for k.param
#' @param plot.SNN Plot the SNN graph
#' @param prune.SNN Sets the cutoff for acceptable Jaccard distances when
#' computing the neighborhood overlap for the SNN construction. Any edges with
#' values less than or equal to this will be set to 0 and removed from the SNN
#' graph. Essentially sets the strigency of pruning (0 --- no pruning, 1 ---
#' prune everything).
#' @param print.output Whether or not to print output to the console
#' @param distance.matrix Build SNN from distance matrix (experimental)
#' @param force.recalc Force recalculation of SNN.
#' @importFrom FNN get.knn
#' @importFrom igraph plot.igraph graph.adjlist graph.adjacency E
#' @importFrom Matrix sparseMatrix
#' @return Returns the object with object@@snn filled
#' @export
#'
#' @examples
#'
#' pbmc_small
#' # Compute an SNN on the gene expression level
#' pbmc_small <- BuildSNN(pbmc_small, genes.use = [email protected])
#'
#' # More commonly, we build the SNN on a dimensionally reduced form of the data
#' # such as the first 10 principle components.
#'
#' pbmc_small <- BuildSNN(pbmc_small, reduction.type = "pca", dims.use = 1:10)
#'
BuildSNN <- function(
object,
genes.use = NULL,
reduction.type = "pca",
dims.use = NULL,
k.param = 10,
k.scale = 10,
plot.SNN = FALSE,
prune.SNN = 1/15,
print.output = TRUE,
distance.matrix = NULL,
force.recalc = FALSE
) {
if (! is.null(x = distance.matrix)) {
data.use <- distance.matrix
force.recalc <- TRUE
} else if (is.null(x = dims.use)) {
genes.use <- SetIfNull(x = genes.use, default = [email protected])
data.use <- t(x = as.matrix(x = object@data[genes.use, ]))
} else {
data.use <- GetCellEmbeddings(object = object,
reduction.type = reduction.type,
dims.use = dims.use)
}
parameters.to.store <- as.list(environment(), all = TRUE)[names(formals("BuildSNN"))]
parameters.to.store$object <- NULL
parameters.to.store$distance.matrix <- NULL
parameters.to.store$print.output <- NULL
if (CalcInfoExists(object, "BuildSNN") && ! force.recalc){
old.parameters <- GetAllCalcParam(object, "BuildSNN")
old.parameters$time <- NULL
old.parameters$print.output <- NULL
if(all(all.equal(old.parameters, parameters.to.store) == TRUE)){
warning("Build parameters exactly match those of already computed and stored SNN. To force recalculation, set force.recalc to TRUE.")
return(object)
}
}
object <- SetCalcParams(object = object,
calculation = "BuildSNN",
... = parameters.to.store)
n.cells <- nrow(x = data.use)
if (n.cells < k.param) {
warning("k.param set larger than number of cells. Setting k.param to number of cells - 1.")
k.param <- n.cells - 1
}
# find the k-nearest neighbors for each single cell
if (is.null(x = distance.matrix)) {
my.knn <- get.knn(
data <- as.matrix(x = data.use),
k = min(k.scale * k.param, n.cells - 1)
)
nn.ranked <- cbind(1:n.cells, my.knn$nn.index[, 1:(k.param-1)])
nn.large <- my.knn$nn.index
} else {
cat("Building SNN based on a provided distance matrix\n", file = stderr())
n <- nrow(x = distance.matrix)
k.for.nn <- k.param * k.scale
knn.mat <- matrix(data = 0, ncol = k.for.nn, nrow = n)
knd.mat <- knn.mat
for (i in 1:n){
knn.mat[i, ] <- order(data.use[i, ])[1:k.for.nn]
knd.mat[i, ] <- data.use[i, knn.mat[i, ]]
}
nn.large <- knn.mat[, 2:(min(n, k.for.nn))]
nn.ranked <- knn.mat[, 1:k.param]
}
w <- CalcSNNSparse(
cell.names = [email protected],
k.param = k.param,
nn.large = nn.large,
nn.ranked = nn.ranked,
prune.SNN = prune.SNN,
print.output = print.output
)
object@snn <- w
if (plot.SNN) {
if(!"tsne" %in% names(object@dr)) {
warning("Please compute a tSNE for SNN visualization. See RunTSNE().")
} else {
if (nrow(object@[email protected]) != length(x = [email protected])) {
warning("Please compute a tSNE for SNN visualization. See RunTSNE().")
} else {
net <- graph.adjacency(
adjmatrix = as.matrix(w),
mode = "undirected",
weighted = TRUE,
diag = FALSE
)
plot.igraph(
x = net,
layout = as.matrix(x = object@[email protected]),
edge.width = E(graph = net)$weight,
vertex.label = NA,
vertex.size = 0
)
}
}
}
return(object)
}
# Function to convert the knn graph into the snn graph. Stored in a sparse
# representation.
# @param cell.names A vector of cell names which will correspond to the row/
# column names of the SNN
# @param k.param Defines nearest neighborhood when computing NN graph
# @param nn.large Full KNN graph (computed with get.knn with k set to
# k.param * k.scale)
# @param nn.ranked Subset of full KNN graph that only contains the first
# k.param nearest neighbors. Used to define Jaccard
# distances between any two cells
# @param prune.snn Sets the cutoff for acceptable Jaccard distances when
# computing the neighborhood overlap for the SNN
# construction. Any edges with values less than or equal to
# this will be set to 0 and removed from the SNN graph.
# Essentially sets the strigency of pruning (0 --- no
# pruning, 1 --- prune everything).
# @param print.output Whether or not to print output to the console
# @return Returns an adjacency matrix representation of the SNN
# graph
#
#' @importFrom utils txtProgressBar setTxtProgressBar
#
CalcSNNSparse <- function(
cell.names,
k.param,
nn.large,
nn.ranked,
prune.SNN,
print.output
) {
n.cells <- length(cell.names)
counter <- 1
idx1 <- vector(mode = "integer", length = n.cells ^ 2 / k.param)
idx2 <- vector(mode = "integer", length = n.cells ^ 2 / k.param)
edge.weight <- vector(mode = "double", length = n.cells ^ 2 / k.param)
id <- 1
# fill out the adjacency matrix w with edge weights only between your target
# cell and its k.scale*k.param-nearest neighbors
# speed things up (don't have to calculate all pairwise distances)
# define the edge weights with Jaccard distance
if (print.output) {
print("Constructing SNN")
pb <- txtProgressBar(min = 0, max = n.cells, style = 3)
}
for (i in 1:n.cells) {
for (j in 1:ncol(x = nn.large)) {
s <- intersect(x = nn.ranked[i, ], y = nn.ranked[nn.large[i, j], ])
u <- union(nn.ranked[i, ], nn.ranked[nn.large[i, j], ])
e <- length(x = s) / length(x = u)
if (e > prune.SNN) {
idx1[id] <- i
idx2[id] <- nn.large[i, j]
edge.weight[id] <- e
id <- id + 1
}
}
if (print.output) {
setTxtProgressBar(pb = pb, value = i)
}
}
if (print.output) {
close(con = pb)
}
idx1 <- idx1[! is.na(x = idx1) & idx1 != 0]
idx2 <- idx2[! is.na(x = idx2) & idx2 != 0]
edge.weight <- edge.weight[! is.na(x = edge.weight) & edge.weight != 0]
w <- sparseMatrix(
i = idx1,
j = idx2,
x = edge.weight,
dims = c(n.cells, n.cells)
)
diag(x = w) <- 1
rownames(x = w) <- cell.names
colnames(x = w) <- cell.names
return(w)
}
# This function calculates the pairwise connectivity of clusters.
# @param object Seurat object containing the snn graph and cluster assignments
# @return matrix with all pairwise connectivities calculated
CalcConnectivity <- function(object) {
SNN <- object@snn
cluster.names <- unique(x = object@ident)
num.clusters <- length(x = cluster.names)
connectivity <- matrix(data = 0, nrow = num.clusters, ncol = num.clusters)
rownames(x = connectivity) <- cluster.names
colnames(x = connectivity) <- cluster.names
n <- 1
for (i in cluster.names) {
for (j in cluster.names[-(1:n)]) {
subSNN <- SNN[
match(x = WhichCells(object = object, ident = i), colnames(x = SNN)),
match(x = WhichCells(object = object, ident = j), rownames(x = SNN))
]
if (is.object(x = subSNN)) {
connectivity[i, j] <- sum(subSNN) / (nrow(x = subSNN) * ncol(x = subSNN))
} else {
connectivity[i, j] <- mean(x = subSNN)
}
}
n <- n + 1
}
return(connectivity)
}