Skip to content

Commit b3794be

Browse files
committed
fix: rework exclusion parsing to fix #1779
1 parent 75f9d51 commit b3794be

File tree

7 files changed

+102
-130
lines changed

7 files changed

+102
-130
lines changed

Diff for: .git-blame-ignore-revs

-29
This file was deleted.

Diff for: CHANGES.rst

+4
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,17 @@ upgrading your version of coverage.py.
2323
Unreleased
2424
----------
2525

26+
- Fix: nested matches of exclude patterns could exclude too much code, as
27+
reported in `issue 1779`_. This is now fixed.
28+
2629
- In the HTML report, the filter term and "hide covered" checkbox settings are
2730
remembered between viewings, thanks to `Daniel Diniz <pull 1776_>`_.
2831

2932
- Python 3.13.0b1 is supported.
3033

3134

3235
.. _pull 1776: https://github.com/nedbat/coveragepy/pull/1776
36+
.. _issue 1779: https://github.com/nedbat/coveragepy/issues/1779
3337

3438

3539
.. scriv-start-here

Diff for: coverage/parser.py

+51-67
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from coverage.bytecode import code_objects
2626
from coverage.debug import short_stack
2727
from coverage.exceptions import NoSource, NotPython
28-
from coverage.misc import join_regex, nice_pair
28+
from coverage.misc import nice_pair
2929
from coverage.phystokens import generate_tokens
3030
from coverage.types import TArc, TLineNo
3131

@@ -62,8 +62,8 @@ def __init__(
6262

6363
self.exclude = exclude
6464

65-
# The text lines of the parsed code.
66-
self.lines: list[str] = self.text.split("\n")
65+
# The parsed AST of the text.
66+
self._ast_root: ast.AST | None = None
6767

6868
# The normalized line numbers of the statements in the code. Exclusions
6969
# are taken into account, and statements are adjusted to their first
@@ -101,19 +101,16 @@ def __init__(
101101
self._all_arcs: set[TArc] | None = None
102102
self._missing_arc_fragments: TArcFragments | None = None
103103

104-
@functools.lru_cache()
105-
def lines_matching(self, *regexes: str) -> set[TLineNo]:
106-
"""Find the lines matching one of a list of regexes.
104+
def lines_matching(self, regex: str) -> set[TLineNo]:
105+
"""Find the lines matching a regex.
107106
108-
Returns a set of line numbers, the lines that contain a match for one
109-
of the regexes in `regexes`. The entire line needn't match, just a
110-
part of it.
107+
Returns a set of line numbers, the lines that contain a match for
108+
`regex`. The entire line needn't match, just a part of it.
111109
112110
"""
113-
combined = join_regex(regexes)
114-
regex_c = re.compile(combined)
111+
regex_c = re.compile(regex)
115112
matches = set()
116-
for i, ltext in enumerate(self.lines, start=1):
113+
for i, ltext in enumerate(self.text.split("\n"), start=1):
117114
if regex_c.search(ltext):
118115
matches.add(self._multiline.get(i, i))
119116
return matches
@@ -127,26 +124,18 @@ def _raw_parse(self) -> None:
127124
# Find lines which match an exclusion pattern.
128125
if self.exclude:
129126
self.raw_excluded = self.lines_matching(self.exclude)
127+
self.excluded = set(self.raw_excluded)
130128

131-
# Tokenize, to find excluded suites, to find docstrings, and to find
132-
# multi-line statements.
133-
134-
# The last token seen. Start with INDENT to get module docstrings
135-
prev_toktype: int = token.INDENT
136129
# The current number of indents.
137130
indent: int = 0
138131
# An exclusion comment will exclude an entire clause at this indent.
139132
exclude_indent: int = 0
140133
# Are we currently excluding lines?
141134
excluding: bool = False
142-
# Are we excluding decorators now?
143-
excluding_decorators: bool = False
144135
# The line number of the first line in a multi-line statement.
145136
first_line: int = 0
146137
# Is the file empty?
147138
empty: bool = True
148-
# Is this the first token on a line?
149-
first_on_line: bool = True
150139
# Parenthesis (and bracket) nesting level.
151140
nesting: int = 0
152141

@@ -162,42 +151,22 @@ def _raw_parse(self) -> None:
162151
indent += 1
163152
elif toktype == token.DEDENT:
164153
indent -= 1
165-
elif toktype == token.NAME:
166-
if ttext == "class":
167-
# Class definitions look like branches in the bytecode, so
168-
# we need to exclude them. The simplest way is to note the
169-
# lines with the "class" keyword.
170-
self.raw_classdefs.add(slineno)
171154
elif toktype == token.OP:
172155
if ttext == ":" and nesting == 0:
173156
should_exclude = (
174-
self.raw_excluded.intersection(range(first_line, elineno + 1))
175-
or excluding_decorators
157+
self.excluded.intersection(range(first_line, elineno + 1))
176158
)
177159
if not excluding and should_exclude:
178160
# Start excluding a suite. We trigger off of the colon
179161
# token so that the #pragma comment will be recognized on
180162
# the same line as the colon.
181-
self.raw_excluded.add(elineno)
163+
self.excluded.add(elineno)
182164
exclude_indent = indent
183165
excluding = True
184-
excluding_decorators = False
185-
elif ttext == "@" and first_on_line:
186-
# A decorator.
187-
if elineno in self.raw_excluded:
188-
excluding_decorators = True
189-
if excluding_decorators:
190-
self.raw_excluded.add(elineno)
191166
elif ttext in "([{":
192167
nesting += 1
193168
elif ttext in ")]}":
194169
nesting -= 1
195-
elif toktype == token.STRING:
196-
if prev_toktype == token.INDENT:
197-
# Strings that are first on an indented line are docstrings.
198-
# (a trick from trace.py in the stdlib.) This works for
199-
# 99.9999% of cases.
200-
self.raw_docstrings.update(range(slineno, elineno+1))
201170
elif toktype == token.NEWLINE:
202171
if first_line and elineno != first_line:
203172
# We're at the end of a line, and we've ended on a
@@ -206,7 +175,6 @@ def _raw_parse(self) -> None:
206175
for l in range(first_line, elineno+1):
207176
self._multiline[l] = first_line
208177
first_line = 0
209-
first_on_line = True
210178

211179
if ttext.strip() and toktype != tokenize.COMMENT:
212180
# A non-white-space token.
@@ -218,10 +186,7 @@ def _raw_parse(self) -> None:
218186
if excluding and indent <= exclude_indent:
219187
excluding = False
220188
if excluding:
221-
self.raw_excluded.add(elineno)
222-
first_on_line = False
223-
224-
prev_toktype = toktype
189+
self.excluded.add(elineno)
225190

226191
# Find the starts of the executable statements.
227192
if not empty:
@@ -234,6 +199,34 @@ def _raw_parse(self) -> None:
234199
if env.PYBEHAVIOR.module_firstline_1 and self._multiline:
235200
self._multiline[1] = min(self.raw_statements)
236201

202+
self.excluded = self.first_lines(self.excluded)
203+
204+
# AST lets us find classes, docstrings, and decorator-affected
205+
# functions and classes.
206+
assert self._ast_root is not None
207+
for node in ast.walk(self._ast_root):
208+
# Find class definitions.
209+
if isinstance(node, ast.ClassDef):
210+
self.raw_classdefs.add(node.lineno)
211+
# Find docstrings.
212+
if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef, ast.Module)):
213+
if node.body:
214+
first = node.body[0]
215+
if (
216+
isinstance(first, ast.Expr)
217+
and isinstance(first.value, ast.Constant)
218+
and isinstance(first.value.value, str)
219+
):
220+
self.raw_docstrings.update(
221+
range(first.lineno, cast(int, first.end_lineno) + 1)
222+
)
223+
# Exclusions carry from decorators and signatures to the bodies of
224+
# functions and classes.
225+
if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
226+
first_line = min((d.lineno for d in node.decorator_list), default=node.lineno)
227+
if self.excluded.intersection(range(first_line, node.lineno + 1)):
228+
self.excluded.update(range(first_line, cast(int, node.end_lineno) + 1))
229+
237230
@functools.lru_cache(maxsize=1000)
238231
def first_line(self, lineno: TLineNo) -> TLineNo:
239232
"""Return the first line number of the statement including `lineno`."""
@@ -268,19 +261,14 @@ def parse_source(self) -> None:
268261
269262
"""
270263
try:
264+
self._ast_root = ast.parse(self.text)
271265
self._raw_parse()
272-
except (tokenize.TokenError, IndentationError, SyntaxError) as err:
273-
if hasattr(err, "lineno"):
274-
lineno = err.lineno # IndentationError
275-
else:
276-
lineno = err.args[1][0] # TokenError
266+
except (IndentationError, SyntaxError) as err:
277267
raise NotPython(
278268
f"Couldn't parse '{self.filename}' as Python source: " +
279-
f"{err.args[0]!r} at line {lineno}",
269+
f"{err.args[0]!r} at line {err.lineno}",
280270
) from err
281271

282-
self.excluded = self.first_lines(self.raw_excluded)
283-
284272
ignore = self.excluded | self.raw_docstrings
285273
starts = self.raw_statements - ignore
286274
self.statements = self.first_lines(starts) - ignore
@@ -303,7 +291,8 @@ def _analyze_ast(self) -> None:
303291
`_all_arcs` is the set of arcs in the code.
304292
305293
"""
306-
aaa = AstArcAnalyzer(self.text, self.raw_statements, self._multiline)
294+
assert self._ast_root is not None
295+
aaa = AstArcAnalyzer(self._ast_root, self.raw_statements, self._multiline)
307296
aaa.analyze()
308297

309298
self._all_arcs = set()
@@ -403,14 +392,9 @@ def __init__(
403392
self.code = code
404393
else:
405394
assert filename is not None
406-
try:
407-
self.code = compile(text, filename, "exec", dont_inherit=True)
408-
except SyntaxError as synerr:
409-
raise NotPython(
410-
"Couldn't parse '%s' as Python source: '%s' at line %d" % (
411-
filename, synerr.msg, synerr.lineno or 0,
412-
),
413-
) from synerr
395+
# We only get here if earlier ast parsing succeeded, so no need to
396+
# catch errors.
397+
self.code = compile(text, filename, "exec", dont_inherit=True)
414398

415399
def child_parsers(self) -> Iterable[ByteParser]:
416400
"""Iterate over all the code objects nested within this one.
@@ -685,11 +669,11 @@ class AstArcAnalyzer:
685669

686670
def __init__(
687671
self,
688-
text: str,
672+
root_node: ast.AST,
689673
statements: set[TLineNo],
690674
multiline: dict[TLineNo, TLineNo],
691675
) -> None:
692-
self.root_node = ast.parse(text)
676+
self.root_node = root_node
693677
# TODO: I think this is happening in too many places.
694678
self.statements = {multiline.get(l, l) for l in statements}
695679
self.multiline = multiline

Diff for: coverage/python.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,10 @@ def translate_arcs(self, arcs: Iterable[TArc]) -> set[TArc]:
206206
def no_branch_lines(self) -> set[TLineNo]:
207207
assert self.coverage is not None
208208
no_branch = self.parser.lines_matching(
209-
join_regex(self.coverage.config.partial_list),
210-
join_regex(self.coverage.config.partial_always_list),
209+
join_regex(
210+
self.coverage.config.partial_list
211+
+ self.coverage.config.partial_always_list
212+
)
211213
)
212214
return no_branch
213215

Diff for: lab/parser.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def one_file(self, options, filename):
8080

8181
if options.dis:
8282
print("Main code:")
83-
disassemble(pyparser)
83+
disassemble(pyparser.text)
8484

8585
arcs = pyparser.arcs()
8686

@@ -96,7 +96,7 @@ def one_file(self, options, filename):
9696
exit_counts = pyparser.exit_counts()
9797

9898
for lineno, ltext in enumerate(pyparser.lines, start=1):
99-
marks = [' ', ' ', ' ', ' ', ' ']
99+
marks = [' '] * 6
100100
a = ' '
101101
if lineno in pyparser.raw_statements:
102102
marks[0] = '-'
@@ -110,7 +110,13 @@ def one_file(self, options, filename):
110110
if lineno in pyparser.raw_classdefs:
111111
marks[3] = 'C'
112112
if lineno in pyparser.raw_excluded:
113-
marks[4] = 'x'
113+
marks[4] = 'X'
114+
elif lineno in pyparser.excluded:
115+
marks[4] = '×'
116+
if lineno in pyparser._multiline.values():
117+
marks[5] = 'o'
118+
elif lineno in pyparser._multiline.keys():
119+
marks[5] = '.'
114120

115121
if arc_chars:
116122
a = arc_chars[lineno].ljust(arc_width)
@@ -173,13 +179,13 @@ def all_code_objects(code):
173179
yield code
174180

175181

176-
def disassemble(pyparser):
182+
def disassemble(text):
177183
"""Disassemble code, for ad-hoc experimenting."""
178184

179-
code = compile(pyparser.text, "", "exec", dont_inherit=True)
185+
code = compile(text, "", "exec", dont_inherit=True)
180186
for code_obj in all_code_objects(code):
181-
if pyparser.text:
182-
srclines = pyparser.text.splitlines()
187+
if text:
188+
srclines = text.splitlines()
183189
else:
184190
srclines = None
185191
print("\n%s: " % code_obj)

Diff for: tests/test_coverage.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -664,12 +664,12 @@ def test_module_docstring(self) -> None:
664664
[2, 3],
665665
)
666666
self.check_coverage("""\
667-
# Start with a comment, because it changes the behavior(!?)
667+
# Start with a comment, even though it doesn't change the behavior.
668668
'''I am a module docstring.'''
669669
a = 3
670670
b = 4
671671
""",
672-
[2, 3, 4],
672+
[3, 4],
673673
)
674674

675675

0 commit comments

Comments
 (0)