forked from satijalab/seurat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFindMarkers.Rd
335 lines (293 loc) · 11.9 KB
/
FindMarkers.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generics.R, R/differential_expression.R
\name{FindMarkers}
\alias{FindMarkers}
\alias{FindMarkersNode}
\alias{FindMarkers.default}
\alias{FindMarkers.Assay}
\alias{FindMarkers.SCTAssay}
\alias{FindMarkers.DimReduc}
\alias{FindMarkers.Seurat}
\title{Gene expression markers of identity classes}
\usage{
FindMarkers(object, ...)
\method{FindMarkers}{default}(
object,
slot = "data",
counts = numeric(),
cells.1 = NULL,
cells.2 = NULL,
features = NULL,
logfc.threshold = 0.1,
test.use = "wilcox",
min.pct = 0.01,
min.diff.pct = -Inf,
verbose = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
pseudocount.use = 1,
fc.results = NULL,
densify = FALSE,
...
)
\method{FindMarkers}{Assay}(
object,
slot = "data",
cells.1 = NULL,
cells.2 = NULL,
features = NULL,
logfc.threshold = 0.1,
test.use = "wilcox",
min.pct = 0.01,
min.diff.pct = -Inf,
verbose = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
pseudocount.use = 1,
mean.fxn = NULL,
fc.name = NULL,
base = 2,
densify = FALSE,
norm.method = NULL,
...
)
\method{FindMarkers}{SCTAssay}(
object,
slot = "data",
cells.1 = NULL,
cells.2 = NULL,
features = NULL,
logfc.threshold = 0.1,
test.use = "wilcox",
min.pct = 0.01,
min.diff.pct = -Inf,
verbose = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
pseudocount.use = 1,
mean.fxn = NULL,
fc.name = NULL,
base = 2,
densify = FALSE,
recorrect_umi = TRUE,
...
)
\method{FindMarkers}{DimReduc}(
object,
cells.1 = NULL,
cells.2 = NULL,
features = NULL,
logfc.threshold = 0.1,
test.use = "wilcox",
min.pct = 0.01,
min.diff.pct = -Inf,
verbose = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
pseudocount.use = 1,
mean.fxn = rowMeans,
fc.name = NULL,
densify = FALSE,
...
)
\method{FindMarkers}{Seurat}(
object,
ident.1 = NULL,
ident.2 = NULL,
group.by = NULL,
subset.ident = NULL,
assay = NULL,
slot = "data",
reduction = NULL,
features = NULL,
logfc.threshold = 0.1,
pseudocount.use = 1,
test.use = "wilcox",
min.pct = 0.01,
min.diff.pct = -Inf,
verbose = TRUE,
only.pos = FALSE,
max.cells.per.ident = Inf,
random.seed = 1,
latent.vars = NULL,
min.cells.feature = 3,
min.cells.group = 3,
mean.fxn = NULL,
fc.name = NULL,
base = 2,
densify = FALSE,
...
)
}
\arguments{
\item{object}{An object}
\item{...}{Arguments passed to other methods and to specific DE methods}
\item{slot}{Slot to pull data from; note that if \code{test.use} is "negbinom", "poisson", or "DESeq2",
\code{slot} will be set to "counts"}
\item{counts}{Count matrix if using scale.data for DE tests. This is used for
computing pct.1 and pct.2 and for filtering features based on fraction
expressing}
\item{cells.1}{Vector of cell names belonging to group 1}
\item{cells.2}{Vector of cell names belonging to group 2}
\item{features}{Genes to test. Default is to use all genes}
\item{logfc.threshold}{Limit testing to genes which show, on average, at least
X-fold difference (log-scale) between the two groups of cells. Default is 0.1
Increasing logfc.threshold speeds up the function, but can miss weaker signals.}
\item{test.use}{Denotes which test to use. Available options are:
\itemize{
\item{"wilcox"} : Identifies differentially expressed genes between two
groups of cells using a Wilcoxon Rank Sum test (default); will use a fast
implementation by Presto if installed
\item{"wilcox_limma"} : Identifies differentially expressed genes between two
groups of cells using the limma implementation of the Wilcoxon Rank Sum test;
set this option to reproduce results from Seurat v4
\item{"bimod"} : Likelihood-ratio test for single cell gene expression,
(McDavid et al., Bioinformatics, 2013)
\item{"roc"} : Identifies 'markers' of gene expression using ROC analysis.
For each gene, evaluates (using AUC) a classifier built on that gene alone,
to classify between two groups of cells. An AUC value of 1 means that
expression values for this gene alone can perfectly classify the two
groupings (i.e. Each of the cells in cells.1 exhibit a higher level than
each of the cells in cells.2). An AUC value of 0 also means there is perfect
classification, but in the other direction. A value of 0.5 implies that
the gene has no predictive power to classify the two groups. Returns a
'predictive power' (abs(AUC-0.5) * 2) ranked matrix of putative differentially
expressed genes.
\item{"t"} : Identify differentially expressed genes between two groups of
cells using the Student's t-test.
\item{"negbinom"} : Identifies differentially expressed genes between two
groups of cells using a negative binomial generalized linear model.
Use only for UMI-based datasets
\item{"poisson"} : Identifies differentially expressed genes between two
groups of cells using a poisson generalized linear model.
Use only for UMI-based datasets
\item{"LR"} : Uses a logistic regression framework to determine differentially
expressed genes. Constructs a logistic regression model predicting group
membership based on each feature individually and compares this to a null
model with a likelihood ratio test.
\item{"MAST"} : Identifies differentially expressed genes between two groups
of cells using a hurdle model tailored to scRNA-seq data. Utilizes the MAST
package to run the DE testing.
\item{"DESeq2"} : Identifies differentially expressed genes between two groups
of cells based on a model using DESeq2 which uses a negative binomial
distribution (Love et al, Genome Biology, 2014).This test does not support
pre-filtering of genes based on average difference (or percent detection rate)
between cell groups. However, genes may be pre-filtered based on their
minimum detection rate (min.pct) across both cell groups. To use this method,
please install DESeq2, using the instructions at
https://bioconductor.org/packages/release/bioc/html/DESeq2.html
}}
\item{min.pct}{only test genes that are detected in a minimum fraction of
min.pct cells in either of the two populations. Meant to speed up the function
by not testing genes that are very infrequently expressed. Default is 0.01}
\item{min.diff.pct}{only test genes that show a minimum difference in the
fraction of detection between the two groups. Set to -Inf by default}
\item{verbose}{Print a progress bar once expression testing begins}
\item{only.pos}{Only return positive markers (FALSE by default)}
\item{max.cells.per.ident}{Down sample each identity class to a max number.
Default is no downsampling. Not activated by default (set to Inf)}
\item{random.seed}{Random seed for downsampling}
\item{latent.vars}{Variables to test, used only when \code{test.use} is one of
'LR', 'negbinom', 'poisson', or 'MAST'}
\item{min.cells.feature}{Minimum number of cells expressing the feature in at least one
of the two groups, currently only used for poisson and negative binomial tests}
\item{min.cells.group}{Minimum number of cells in one of the groups}
\item{pseudocount.use}{Pseudocount to add to averaged expression values when
calculating logFC. 1 by default.}
\item{fc.results}{data.frame from FoldChange}
\item{densify}{Convert the sparse matrix to a dense form before running the DE test. This can provide speedups but might require higher memory; default is FALSE}
\item{mean.fxn}{Function to use for fold change or average difference calculation.
If NULL, the appropriate function will be chose according to the slot used}
\item{fc.name}{Name of the fold change, average difference, or custom function column
in the output data.frame. If NULL, the fold change column will be named
according to the logarithm base (eg, "avg_log2FC"), or if using the scale.data
slot "avg_diff".}
\item{base}{The base with respect to which logarithms are computed.}
\item{norm.method}{Normalization method for fold change calculation when
\code{slot} is \dQuote{\code{data}}}
\item{recorrect_umi}{Recalculate corrected UMI counts using minimum of the median UMIs when performing DE using multiple SCT objects; default is TRUE}
\item{ident.1}{Identity class to define markers for; pass an object of class
\code{phylo} or 'clustertree' to find markers for a node in a cluster tree;
passing 'clustertree' requires \code{\link{BuildClusterTree}} to have been run}
\item{ident.2}{A second identity class for comparison; if \code{NULL},
use all other cells for comparison; if an object of class \code{phylo} or
'clustertree' is passed to \code{ident.1}, must pass a node to find markers for}
\item{group.by}{Regroup cells into a different identity class prior to performing differential expression (see example)}
\item{subset.ident}{Subset a particular identity class prior to regrouping. Only relevant if group.by is set (see example)}
\item{assay}{Assay to use in differential expression testing}
\item{reduction}{Reduction to use in differential expression testing - will test for DE on cell embeddings}
}
\value{
data.frame with a ranked list of putative markers as rows, and associated
statistics as columns (p-values, ROC score, etc., depending on the test used (\code{test.use})). The following columns are always present:
\itemize{
\item \code{avg_logFC}: log fold-chage of the average expression between the two groups. Positive values indicate that the gene is more highly expressed in the first group
\item \code{pct.1}: The percentage of cells where the gene is detected in the first group
\item \code{pct.2}: The percentage of cells where the gene is detected in the second group
\item \code{p_val_adj}: Adjusted p-value, based on bonferroni correction using all genes in the dataset
}
}
\description{
Finds markers (differentially expressed genes) for identity classes
}
\details{
p-value adjustment is performed using bonferroni correction based on
the total number of genes in the dataset. Other correction methods are not
recommended, as Seurat pre-filters genes using the arguments above, reducing
the number of tests performed. Lastly, as Aaron Lun has pointed out, p-values
should be interpreted cautiously, as the genes used for clustering are the
same genes tested for differential expression.
}
\examples{
\dontrun{
data("pbmc_small")
# Find markers for cluster 2
markers <- FindMarkers(object = pbmc_small, ident.1 = 2)
head(x = markers)
# Take all cells in cluster 2, and find markers that separate cells in the 'g1' group (metadata
# variable 'group')
markers <- FindMarkers(pbmc_small, ident.1 = "g1", group.by = 'groups', subset.ident = "2")
head(x = markers)
# Pass 'clustertree' or an object of class phylo to ident.1 and
# a node to ident.2 as a replacement for FindMarkersNode
if (requireNamespace("ape", quietly = TRUE)) {
pbmc_small <- BuildClusterTree(object = pbmc_small)
markers <- FindMarkers(object = pbmc_small, ident.1 = 'clustertree', ident.2 = 5)
head(x = markers)
}
}
}
\references{
McDavid A, Finak G, Chattopadyay PK, et al. Data exploration,
quality control and testing in single-cell qPCR-based gene expression experiments.
Bioinformatics. 2013;29(4):461-467. doi:10.1093/bioinformatics/bts714
Trapnell C, et al. The dynamics and regulators of cell fate
decisions are revealed by pseudotemporal ordering of single cells. Nature
Biotechnology volume 32, pages 381-386 (2014)
Andrew McDavid, Greg Finak and Masanao Yajima (2017). MAST: Model-based
Analysis of Single Cell Transcriptomics. R package version 1.2.1.
https://github.com/RGLab/MAST/
Love MI, Huber W and Anders S (2014). "Moderated estimation of
fold change and dispersion for RNA-seq data with DESeq2." Genome Biology.
https://bioconductor.org/packages/release/bioc/html/DESeq2.html
}
\seealso{
\code{FoldChange}
}
\concept{differential_expression}