25
25
from coverage .bytecode import code_objects
26
26
from coverage .debug import short_stack
27
27
from coverage .exceptions import NoSource , NotPython
28
- from coverage .misc import join_regex , nice_pair
28
+ from coverage .misc import nice_pair
29
29
from coverage .phystokens import generate_tokens
30
30
from coverage .types import TArc , TLineNo
31
31
@@ -62,8 +62,8 @@ def __init__(
62
62
63
63
self .exclude = exclude
64
64
65
- # The text lines of the parsed code .
66
- self .lines : list [ str ] = self . text . split ( " \n " )
65
+ # The parsed AST of the text .
66
+ self ._ast_root : ast . AST | None = None
67
67
68
68
# The normalized line numbers of the statements in the code. Exclusions
69
69
# are taken into account, and statements are adjusted to their first
@@ -101,19 +101,16 @@ def __init__(
101
101
self ._all_arcs : set [TArc ] | None = None
102
102
self ._missing_arc_fragments : TArcFragments | None = None
103
103
104
- @functools .lru_cache ()
105
- def lines_matching (self , * regexes : str ) -> set [TLineNo ]:
106
- """Find the lines matching one of a list of regexes.
104
+ def lines_matching (self , regex : str ) -> set [TLineNo ]:
105
+ """Find the lines matching a regex.
107
106
108
- Returns a set of line numbers, the lines that contain a match for one
109
- of the regexes in `regexes`. The entire line needn't match, just a
110
- part of it.
107
+ Returns a set of line numbers, the lines that contain a match for
108
+ `regex`. The entire line needn't match, just a part of it.
111
109
112
110
"""
113
- combined = join_regex (regexes )
114
- regex_c = re .compile (combined )
111
+ regex_c = re .compile (regex )
115
112
matches = set ()
116
- for i , ltext in enumerate (self .lines , start = 1 ):
113
+ for i , ltext in enumerate (self .text . split ( " \n " ) , start = 1 ):
117
114
if regex_c .search (ltext ):
118
115
matches .add (self ._multiline .get (i , i ))
119
116
return matches
@@ -127,26 +124,18 @@ def _raw_parse(self) -> None:
127
124
# Find lines which match an exclusion pattern.
128
125
if self .exclude :
129
126
self .raw_excluded = self .lines_matching (self .exclude )
127
+ self .excluded = set (self .raw_excluded )
130
128
131
- # Tokenize, to find excluded suites, to find docstrings, and to find
132
- # multi-line statements.
133
-
134
- # The last token seen. Start with INDENT to get module docstrings
135
- prev_toktype : int = token .INDENT
136
129
# The current number of indents.
137
130
indent : int = 0
138
131
# An exclusion comment will exclude an entire clause at this indent.
139
132
exclude_indent : int = 0
140
133
# Are we currently excluding lines?
141
134
excluding : bool = False
142
- # Are we excluding decorators now?
143
- excluding_decorators : bool = False
144
135
# The line number of the first line in a multi-line statement.
145
136
first_line : int = 0
146
137
# Is the file empty?
147
138
empty : bool = True
148
- # Is this the first token on a line?
149
- first_on_line : bool = True
150
139
# Parenthesis (and bracket) nesting level.
151
140
nesting : int = 0
152
141
@@ -162,42 +151,22 @@ def _raw_parse(self) -> None:
162
151
indent += 1
163
152
elif toktype == token .DEDENT :
164
153
indent -= 1
165
- elif toktype == token .NAME :
166
- if ttext == "class" :
167
- # Class definitions look like branches in the bytecode, so
168
- # we need to exclude them. The simplest way is to note the
169
- # lines with the "class" keyword.
170
- self .raw_classdefs .add (slineno )
171
154
elif toktype == token .OP :
172
155
if ttext == ":" and nesting == 0 :
173
156
should_exclude = (
174
- self .raw_excluded .intersection (range (first_line , elineno + 1 ))
175
- or excluding_decorators
157
+ self .excluded .intersection (range (first_line , elineno + 1 ))
176
158
)
177
159
if not excluding and should_exclude :
178
160
# Start excluding a suite. We trigger off of the colon
179
161
# token so that the #pragma comment will be recognized on
180
162
# the same line as the colon.
181
- self .raw_excluded .add (elineno )
163
+ self .excluded .add (elineno )
182
164
exclude_indent = indent
183
165
excluding = True
184
- excluding_decorators = False
185
- elif ttext == "@" and first_on_line :
186
- # A decorator.
187
- if elineno in self .raw_excluded :
188
- excluding_decorators = True
189
- if excluding_decorators :
190
- self .raw_excluded .add (elineno )
191
166
elif ttext in "([{" :
192
167
nesting += 1
193
168
elif ttext in ")]}" :
194
169
nesting -= 1
195
- elif toktype == token .STRING :
196
- if prev_toktype == token .INDENT :
197
- # Strings that are first on an indented line are docstrings.
198
- # (a trick from trace.py in the stdlib.) This works for
199
- # 99.9999% of cases.
200
- self .raw_docstrings .update (range (slineno , elineno + 1 ))
201
170
elif toktype == token .NEWLINE :
202
171
if first_line and elineno != first_line :
203
172
# We're at the end of a line, and we've ended on a
@@ -206,7 +175,6 @@ def _raw_parse(self) -> None:
206
175
for l in range (first_line , elineno + 1 ):
207
176
self ._multiline [l ] = first_line
208
177
first_line = 0
209
- first_on_line = True
210
178
211
179
if ttext .strip () and toktype != tokenize .COMMENT :
212
180
# A non-white-space token.
@@ -218,10 +186,7 @@ def _raw_parse(self) -> None:
218
186
if excluding and indent <= exclude_indent :
219
187
excluding = False
220
188
if excluding :
221
- self .raw_excluded .add (elineno )
222
- first_on_line = False
223
-
224
- prev_toktype = toktype
189
+ self .excluded .add (elineno )
225
190
226
191
# Find the starts of the executable statements.
227
192
if not empty :
@@ -234,6 +199,34 @@ def _raw_parse(self) -> None:
234
199
if env .PYBEHAVIOR .module_firstline_1 and self ._multiline :
235
200
self ._multiline [1 ] = min (self .raw_statements )
236
201
202
+ self .excluded = self .first_lines (self .excluded )
203
+
204
+ # AST lets us find classes, docstrings, and decorator-affected
205
+ # functions and classes.
206
+ assert self ._ast_root is not None
207
+ for node in ast .walk (self ._ast_root ):
208
+ # Find class definitions.
209
+ if isinstance (node , ast .ClassDef ):
210
+ self .raw_classdefs .add (node .lineno )
211
+ # Find docstrings.
212
+ if isinstance (node , (ast .ClassDef , ast .FunctionDef , ast .AsyncFunctionDef , ast .Module )):
213
+ if node .body :
214
+ first = node .body [0 ]
215
+ if (
216
+ isinstance (first , ast .Expr )
217
+ and isinstance (first .value , ast .Constant )
218
+ and isinstance (first .value .value , str )
219
+ ):
220
+ self .raw_docstrings .update (
221
+ range (first .lineno , cast (int , first .end_lineno ) + 1 )
222
+ )
223
+ # Exclusions carry from decorators and signatures to the bodies of
224
+ # functions and classes.
225
+ if isinstance (node , (ast .ClassDef , ast .FunctionDef , ast .AsyncFunctionDef )):
226
+ first_line = min ((d .lineno for d in node .decorator_list ), default = node .lineno )
227
+ if self .excluded .intersection (range (first_line , node .lineno + 1 )):
228
+ self .excluded .update (range (first_line , cast (int , node .end_lineno ) + 1 ))
229
+
237
230
@functools .lru_cache (maxsize = 1000 )
238
231
def first_line (self , lineno : TLineNo ) -> TLineNo :
239
232
"""Return the first line number of the statement including `lineno`."""
@@ -268,19 +261,14 @@ def parse_source(self) -> None:
268
261
269
262
"""
270
263
try :
264
+ self ._ast_root = ast .parse (self .text )
271
265
self ._raw_parse ()
272
- except (tokenize .TokenError , IndentationError , SyntaxError ) as err :
273
- if hasattr (err , "lineno" ):
274
- lineno = err .lineno # IndentationError
275
- else :
276
- lineno = err .args [1 ][0 ] # TokenError
266
+ except (IndentationError , SyntaxError ) as err :
277
267
raise NotPython (
278
268
f"Couldn't parse '{ self .filename } ' as Python source: " +
279
- f"{ err .args [0 ]!r} at line { lineno } " ,
269
+ f"{ err .args [0 ]!r} at line { err . lineno } " ,
280
270
) from err
281
271
282
- self .excluded = self .first_lines (self .raw_excluded )
283
-
284
272
ignore = self .excluded | self .raw_docstrings
285
273
starts = self .raw_statements - ignore
286
274
self .statements = self .first_lines (starts ) - ignore
@@ -303,7 +291,8 @@ def _analyze_ast(self) -> None:
303
291
`_all_arcs` is the set of arcs in the code.
304
292
305
293
"""
306
- aaa = AstArcAnalyzer (self .text , self .raw_statements , self ._multiline )
294
+ assert self ._ast_root is not None
295
+ aaa = AstArcAnalyzer (self ._ast_root , self .raw_statements , self ._multiline )
307
296
aaa .analyze ()
308
297
309
298
self ._all_arcs = set ()
@@ -403,14 +392,9 @@ def __init__(
403
392
self .code = code
404
393
else :
405
394
assert filename is not None
406
- try :
407
- self .code = compile (text , filename , "exec" , dont_inherit = True )
408
- except SyntaxError as synerr :
409
- raise NotPython (
410
- "Couldn't parse '%s' as Python source: '%s' at line %d" % (
411
- filename , synerr .msg , synerr .lineno or 0 ,
412
- ),
413
- ) from synerr
395
+ # We only get here if earlier ast parsing succeeded, so no need to
396
+ # catch errors.
397
+ self .code = compile (text , filename , "exec" , dont_inherit = True )
414
398
415
399
def child_parsers (self ) -> Iterable [ByteParser ]:
416
400
"""Iterate over all the code objects nested within this one.
@@ -685,11 +669,11 @@ class AstArcAnalyzer:
685
669
686
670
def __init__ (
687
671
self ,
688
- text : str ,
672
+ root_node : ast . AST ,
689
673
statements : set [TLineNo ],
690
674
multiline : dict [TLineNo , TLineNo ],
691
675
) -> None :
692
- self .root_node = ast . parse ( text )
676
+ self .root_node = root_node
693
677
# TODO: I think this is happening in too many places.
694
678
self .statements = {multiline .get (l , l ) for l in statements }
695
679
self .multiline = multiline
0 commit comments