Skip to content

Commit 21d3e31

Browse files
authored
fix: lcov report fixes (#1849)
* [lcov] Style improvements to lcovreport.py - Rename LcovReporter.get_lcov to LcovReporter.lcov_file because the old name sounds like it returns something. lcov_file was selected by analogy with XmlReporter.xml_file. - Move the increment of self.total from lcov_file to the loop in LcovReporter.report for better separation of concerns. * [lcov] honor skip_empty; improve consistency with xml2lcov Implement skip_empty mode in LcovReporter. Also, improve consistency with what you get by running ‘coverage xml’ and then ‘xml2lcov’ by not emitting any vacuous TN: lines, not emitting LF:0 LH:0 for empty files, and not emitting BRF:0 BRH:0 for files with no branches. * [lcov] don’t write DA-line checksums by default DA-line checksums bulk up the .lcov file and provide only a weak assurance that the source file being processed for analysis matches the source file that was used to generate coverage data. Current versions of the LCOV suite discourage their use. Add a boolean configuration option, lcov.line_checksums, which controls whether checksums are generated for DA lines. Consistent with the current behavior of the LCOV suite, the default is to not generate any checksums. * [lcov] Re-implement lcov reports using the same algorithm as XML reports. This fixes five serious bugs: - The first field of a BRDA: line may not be zero (#1846). - The first field of a BRDA: line is supposed to be the *source* line of each instrumented branch, not the destination line. - The fourth field of a BRDA: line is supposed to be “-” when the branch was *never reached*, not when it was reached but never/always taken (which is what a branch’s presence in missing_arcs means). As far as I can tell, coverage.py currently doesn’t know of the existence of branches that were never reached. - The decision of whether to emit DA: and BRDA: lines at all is now taken strictly according to what’s in analysis.statements. This is important because some lines may appear in analysis.executed and/or analysis.executed_branch_arcs but *not* in analysis.statements. For example, the beginnings of docstrings are like this, as is the phantom line 1 of an empty __init__.py in Python 3.10 and earlier. (I am pleased to note that the special casing of empty __init__.py in the test suite is no longer required after this patch.) - We no longer attempt to call branch-coverage-related Analysis methods when analysis.has_arcs is false. And two minor annoyances: - DA: and BRDA: lines are now emitted strictly in ascending order by (source) line number. - Source file records are now sorted by *relative* pathname, not absolute pathname from the coverage database. --------- Co-authored-by: Zack Weinberg <[email protected]>
1 parent 595ea22 commit 21d3e31

File tree

6 files changed

+204
-165
lines changed

6 files changed

+204
-165
lines changed

Diff for: coverage/config.py

+2
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ def __init__(self) -> None:
250250

251251
# Defaults for [lcov]
252252
self.lcov_output = "coverage.lcov"
253+
self.lcov_line_checksums = False
253254

254255
# Defaults for [paths]
255256
self.paths: dict[str, list[str]] = {}
@@ -428,6 +429,7 @@ def copy(self) -> CoverageConfig:
428429

429430
# [lcov]
430431
("lcov_output", "lcov:output"),
432+
("lcov_line_checksums", "lcov:line_checksums", "boolean")
431433
]
432434

433435
def _set_attr_from_config_option(

Diff for: coverage/env.py

-4
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,6 @@ class PYBEHAVIOR:
9999
# Some words are keywords in some places, identifiers in other places.
100100
soft_keywords = (PYVERSION >= (3, 10))
101101

102-
# Modules start with a line numbered zero. This means empty modules have
103-
# only a 0-number line, which is ignored, giving a truly empty module.
104-
empty_is_empty = (PYVERSION >= (3, 11, 0, "beta", 4))
105-
106102
# PEP669 Low Impact Monitoring: https://peps.python.org/pep-0669/
107103
pep669 = bool(getattr(sys, "monitoring", None))
108104

Diff for: coverage/lcovreport.py

+94-71
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@
2222

2323
def line_hash(line: str) -> str:
2424
"""Produce a hash of a source line for use in the LCOV file."""
25-
# The LCOV file format requires MD5 as a fingerprint of the file. This is
26-
# not a security use. Some security scanners raise alarms about the use of
27-
# MD5 here, but it is a false positive. This is not a security concern.
25+
# The LCOV file format optionally allows each line to be MD5ed as a
26+
# fingerprint of the file. This is not a security use. Some security
27+
# scanners raise alarms about the use of MD5 here, but it is a false
28+
# positive. This is not a security concern.
29+
# The unusual encoding of the MD5 hash, as a base64 sequence with the
30+
# trailing = signs stripped, is specified by the LCOV file format.
2831
hashed = hashlib.md5(line.encode("utf-8")).digest()
2932
return base64.b64encode(hashed).decode("ascii").rstrip("=")
3033

@@ -36,6 +39,7 @@ class LcovReporter:
3639

3740
def __init__(self, coverage: Coverage) -> None:
3841
self.coverage = coverage
42+
self.config = coverage.config
3943
self.total = Numbers(self.coverage.config.precision)
4044

4145
def report(self, morfs: Iterable[TMorf] | None, outfile: IO[str]) -> float:
@@ -49,85 +53,104 @@ def report(self, morfs: Iterable[TMorf] | None, outfile: IO[str]) -> float:
4953
self.coverage.get_data()
5054
outfile = outfile or sys.stdout
5155

52-
for fr, analysis in get_analysis_to_report(self.coverage, morfs):
53-
self.get_lcov(fr, analysis, outfile)
56+
# ensure file records are sorted by the _relative_ filename, not the full path
57+
to_report = [
58+
(fr.relative_filename(), fr, analysis)
59+
for fr, analysis in get_analysis_to_report(self.coverage, morfs)
60+
]
61+
to_report.sort()
62+
63+
for fname, fr, analysis in to_report:
64+
self.total += analysis.numbers
65+
self.lcov_file(fname, fr, analysis, outfile)
5466

5567
return self.total.n_statements and self.total.pc_covered
5668

57-
def get_lcov(self, fr: FileReporter, analysis: Analysis, outfile: IO[str]) -> None:
69+
def lcov_file(
70+
self,
71+
rel_fname: str,
72+
fr: FileReporter,
73+
analysis: Analysis,
74+
outfile: IO[str],
75+
) -> None:
5876
"""Produces the lcov data for a single file.
5977
6078
This currently supports both line and branch coverage,
6179
however function coverage is not supported.
6280
"""
63-
self.total += analysis.numbers
64-
65-
outfile.write("TN:\n")
66-
outfile.write(f"SF:{fr.relative_filename()}\n")
67-
source_lines = fr.source().splitlines()
68-
for covered in sorted(analysis.executed):
69-
if covered in analysis.excluded:
70-
# Do not report excluded as executed
71-
continue
72-
# Note: Coverage.py currently only supports checking *if* a line
73-
# has been executed, not how many times, so we set this to 1 for
74-
# nice output even if it's technically incorrect.
75-
76-
# The lines below calculate a 64-bit encoded md5 hash of the line
77-
# corresponding to the DA lines in the lcov file, for either case
78-
# of the line being covered or missed in coverage.py. The final two
79-
# characters of the encoding ("==") are removed from the hash to
80-
# allow genhtml to run on the resulting lcov file.
81-
if source_lines:
82-
if covered-1 >= len(source_lines):
83-
break
84-
line = source_lines[covered-1]
85-
else:
86-
line = ""
87-
outfile.write(f"DA:{covered},1,{line_hash(line)}\n")
88-
89-
for missed in sorted(analysis.missing):
90-
# We don't have to skip excluded lines here, because `missing`
91-
# already doesn't have them.
92-
assert source_lines
93-
line = source_lines[missed-1]
94-
outfile.write(f"DA:{missed},0,{line_hash(line)}\n")
95-
96-
outfile.write(f"LF:{analysis.numbers.n_statements}\n")
97-
outfile.write(f"LH:{analysis.numbers.n_executed}\n")
98-
99-
# More information dense branch coverage data.
100-
missing_arcs = analysis.missing_branch_arcs()
101-
executed_arcs = analysis.executed_branch_arcs()
102-
for block_number, block_line_number in enumerate(
103-
sorted(analysis.branch_stats().keys()),
104-
):
105-
for branch_number, line_number in enumerate(
106-
sorted(missing_arcs[block_line_number]),
107-
):
108-
# The exit branches have a negative line number,
109-
# this will not produce valid lcov. Setting
110-
# the line number of the exit branch to 0 will allow
111-
# for valid lcov, while preserving the data.
112-
line_number = max(line_number, 0)
113-
outfile.write(f"BRDA:{line_number},{block_number},{branch_number},-\n")
114-
115-
# The start value below allows for the block number to be
116-
# preserved between these two for loops (stopping the loop from
117-
# resetting the value of the block number to 0).
118-
for branch_number, line_number in enumerate(
119-
sorted(executed_arcs[block_line_number]),
120-
start=len(missing_arcs[block_line_number]),
121-
):
122-
line_number = max(line_number, 0)
123-
outfile.write(f"BRDA:{line_number},{block_number},{branch_number},1\n")
124-
125-
# Summary of the branch coverage.
81+
82+
if analysis.numbers.n_statements == 0:
83+
if self.config.skip_empty:
84+
return
85+
86+
outfile.write(f"SF:{rel_fname}\n")
87+
88+
if self.config.lcov_line_checksums:
89+
source_lines = fr.source().splitlines()
90+
91+
# Emit a DA: record for each line of the file.
92+
lines = sorted(analysis.statements)
93+
hash_suffix = ""
94+
for line in lines:
95+
if self.config.lcov_line_checksums:
96+
hash_suffix = "," + line_hash(source_lines[line-1])
97+
# Q: can we get info about the number of times a statement is
98+
# executed? If so, that should be recorded here.
99+
hit = int(line not in analysis.missing)
100+
outfile.write(f"DA:{line},{hit}{hash_suffix}\n")
101+
102+
if analysis.numbers.n_statements > 0:
103+
outfile.write(f"LF:{analysis.numbers.n_statements}\n")
104+
outfile.write(f"LH:{analysis.numbers.n_executed}\n")
105+
106+
# More information dense branch coverage data, if available.
126107
if analysis.has_arcs:
127108
branch_stats = analysis.branch_stats()
109+
executed_arcs = analysis.executed_branch_arcs()
110+
missing_arcs = analysis.missing_branch_arcs()
111+
112+
for line in lines:
113+
if line in branch_stats:
114+
# The meaning of a BRDA: line is not well explained in the lcov
115+
# documentation. Based on what genhtml does with them, however,
116+
# the interpretation is supposed to be something like this:
117+
# BRDA: <line>, <block>, <branch>, <hit>
118+
# where <line> is the source line number of the *origin* of the
119+
# branch; <block> is an arbitrary number which distinguishes multiple
120+
# control flow operations on a single line; <branch> is an arbitrary
121+
# number which distinguishes the possible destinations of the specific
122+
# control flow operation identified by <line> + <block>; and <hit> is
123+
# either the hit count for <line> + <block> + <branch> or "-" meaning
124+
# that <line> + <block> was never *reached*. <line> must be >= 1,
125+
# and <block>, <branch>, <hit> must be >= 0.
126+
127+
# This is only one possible way to map our sets of executed and
128+
# not-executed arcs to BRDA codes. It seems to produce reasonable
129+
# results when fed through genhtml.
130+
131+
# Q: can we get counts of the number of times each arc was executed?
132+
# branch_stats has "total" and "taken" counts for each branch, but it
133+
# doesn't have "taken" broken down by destination.
134+
destinations = {}
135+
for dst in executed_arcs[line]:
136+
destinations[(int(dst < 0), abs(dst))] = 1
137+
for dst in missing_arcs[line]:
138+
destinations[(int(dst < 0), abs(dst))] = 0
139+
140+
if all(v == 0 for v in destinations.values()):
141+
# When _none_ of the out arcs from 'line' were executed, presume
142+
# 'line' was never reached.
143+
for branch, _ in enumerate(sorted(destinations.keys())):
144+
outfile.write(f"BRDA:{line},0,{branch},-\n")
145+
else:
146+
for branch, (_, hit) in enumerate(sorted(destinations.items())):
147+
outfile.write(f"BRDA:{line},0,{branch},{hit}\n")
148+
149+
# Summary of the branch coverage.
128150
brf = sum(t for t, k in branch_stats.values())
129151
brh = brf - sum(t - k for t, k in branch_stats.values())
130-
outfile.write(f"BRF:{brf}\n")
131-
outfile.write(f"BRH:{brh}\n")
152+
if brf > 0:
153+
outfile.write(f"BRF:{brf}\n")
154+
outfile.write(f"BRH:{brh}\n")
132155

133156
outfile.write("end_of_record\n")

Diff for: doc/config.rst

+11
Original file line numberDiff line numberDiff line change
@@ -890,3 +890,14 @@ Settings particular to LCOV reporting (see :ref:`cmd_lcov`).
890890
.............
891891

892892
(string, default "coverage.lcov") Where to write the LCOV file.
893+
894+
[lcov] line_checksums
895+
.....................
896+
897+
(boolean, default false) Whether to write per-line checksums as part of the
898+
lcov file. Because these checksums cover only lines with actual code on
899+
them, and do not verify the ordering of lines, they provide only a weak
900+
assurance that the source code available to analysis tools (e.g. ``genhtml``)
901+
matches the code that was used to generate the coverage data.
902+
903+
.. versionadded:: 7.6.2

0 commit comments

Comments
 (0)