Skip to content

Commit ac23c99

Browse files
authored
[ruff] Mark fixes for unsorted-dunder-all and unsorted-dunder-slots as unsafe when there are complex comments in the sequence (RUF022, RUF023) (#14560)
1 parent e5c7d87 commit ac23c99

5 files changed

+219
-87
lines changed

crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs

+70
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
use std::borrow::Cow;
77
use std::cmp::Ordering;
88

9+
use itertools::Itertools;
10+
911
use ruff_python_ast as ast;
1012
use ruff_python_codegen::Stylist;
1113
use ruff_python_parser::{TokenKind, Tokens};
@@ -314,6 +316,52 @@ impl<'a> SortClassification<'a> {
314316
}
315317
}
316318

319+
/// The complexity of the comments in a multiline sequence.
320+
///
321+
/// A sequence like this has "simple" comments: it's unambiguous
322+
/// which item each comment refers to, so there's no "risk" in sorting it:
323+
///
324+
/// ```py
325+
/// __all__ = [
326+
/// "foo", # comment1
327+
/// "bar", # comment2
328+
/// ]
329+
/// ```
330+
///
331+
/// This sequence has complex comments: we can't safely autofix the sort here,
332+
/// as the own-line comments might be used to create sections in `__all__`:
333+
///
334+
/// ```py
335+
/// __all__ = [
336+
/// # fooey things
337+
/// "foo1",
338+
/// "foo2",
339+
/// # barey things
340+
/// "bar1",
341+
/// "foobar",
342+
/// ]
343+
/// ```
344+
///
345+
/// This sequence also has complex comments -- it's ambiguous which item
346+
/// each comment should belong to:
347+
///
348+
/// ```py
349+
/// __all__ = [
350+
/// "foo1", "foo", "barfoo", # fooey things
351+
/// "baz", bazz2", "fbaz", # barrey things
352+
/// ]
353+
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
354+
pub(super) enum CommentComplexity {
355+
Simple,
356+
Complex,
357+
}
358+
359+
impl CommentComplexity {
360+
pub(super) const fn is_complex(self) -> bool {
361+
matches!(self, CommentComplexity::Complex)
362+
}
363+
}
364+
317365
// An instance of this struct encapsulates an analysis
318366
/// of a multiline Python tuple/list that represents an
319367
/// `__all__`/`__slots__`/etc. definition or augmentation.
@@ -328,6 +376,24 @@ impl<'a> MultilineStringSequenceValue<'a> {
328376
self.items.len()
329377
}
330378

379+
/// Determine the [`CommentComplexity`] of this multiline string sequence.
380+
pub(super) fn comment_complexity(&self) -> CommentComplexity {
381+
if self.items.iter().tuple_windows().any(|(first, second)| {
382+
first.has_own_line_comments()
383+
|| first
384+
.end_of_line_comments
385+
.is_some_and(|end_line_comment| second.start() < end_line_comment.end())
386+
}) || self
387+
.items
388+
.last()
389+
.is_some_and(StringSequenceItem::has_own_line_comments)
390+
{
391+
CommentComplexity::Complex
392+
} else {
393+
CommentComplexity::Simple
394+
}
395+
}
396+
331397
/// Analyse the source range for a multiline Python tuple/list that
332398
/// represents an `__all__`/`__slots__`/etc. definition or augmentation.
333399
/// Return `None` if the analysis fails for whatever reason.
@@ -792,6 +858,10 @@ impl<'a> StringSequenceItem<'a> {
792858
fn with_no_comments(value: &'a str, element_range: TextRange) -> Self {
793859
Self::new(value, vec![], element_range, None)
794860
}
861+
862+
fn has_own_line_comments(&self) -> bool {
863+
!self.preceding_comment_ranges.is_empty()
864+
}
795865
}
796866

797867
impl Ranged for StringSequenceItem<'_> {

crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs

+65-39
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
1+
use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
22
use ruff_macros::{derive_message_formats, violation};
33
use ruff_python_ast as ast;
44
use ruff_source_file::LineRanges;
@@ -54,19 +54,40 @@ use crate::rules::ruff::rules::sequence_sorting::{
5454
/// ```
5555
///
5656
/// ## Fix safety
57-
/// This rule's fix is marked as always being safe, in that
57+
/// This rule's fix is marked as unsafe if there are any comments that take up
58+
/// a whole line by themselves inside the `__all__` definition, for example:
59+
/// ```py
60+
/// __all__ = [
61+
/// # eggy things
62+
/// "duck_eggs",
63+
/// "chicken_eggs",
64+
/// # hammy things
65+
/// "country_ham",
66+
/// "parma_ham",
67+
/// ]
68+
/// ```
69+
///
70+
/// This is a common pattern used to delimit categories within a module's API,
71+
/// but it would be out of the scope of this rule to attempt to maintain these
72+
/// categories when alphabetically sorting the items of `__all__`.
73+
///
74+
/// The fix is also marked as unsafe if there are more than two `__all__` items
75+
/// on a single line and that line also has a trailing comment, since here it
76+
/// is impossible to accurately gauge which item the comment should be moved
77+
/// with when sorting `__all__`:
78+
/// ```py
79+
/// __all__ = [
80+
/// "a", "c", "e", # a comment
81+
/// "b", "d", "f", # a second comment
82+
/// ]
83+
/// ```
84+
///
85+
/// Other than this, the rule's fix is marked as always being safe, in that
5886
/// it should very rarely alter the semantics of any Python code.
5987
/// However, note that (although it's rare) the value of `__all__`
6088
/// could be read by code elsewhere that depends on the exact
6189
/// iteration order of the items in `__all__`, in which case this
6290
/// rule's fix could theoretically cause breakage.
63-
///
64-
/// Note also that for multiline `__all__` definitions
65-
/// that include comments on their own line, it can be hard
66-
/// to tell where the comments should be moved to when sorting
67-
/// the contents of `__all__`. While this rule's fix will
68-
/// never delete a comment, it might *sometimes* move a
69-
/// comment to an unexpected location.
7091
#[violation]
7192
pub struct UnsortedDunderAll;
7293

@@ -208,37 +229,42 @@ fn create_fix(
208229
let locator = checker.locator();
209230
let is_multiline = locator.contains_line_break(range);
210231

211-
let sorted_source_code = {
212-
// The machinery in the `MultilineDunderAllValue` is actually
213-
// sophisticated enough that it would work just as well for
214-
// single-line `__all__` definitions, and we could reduce
215-
// the number of lines of code in this file by doing that.
216-
// Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
217-
// must process every token in an `__all__` definition as
218-
// part of its analysis, and this is quite slow. For
219-
// single-line `__all__` definitions, it's also unnecessary,
220-
// as it's impossible to have comments in between the
221-
// `__all__` elements if the `__all__` definition is all on
222-
// a single line. Therefore, as an optimisation, we do the
223-
// bare minimum of token-processing for single-line `__all__`
224-
// definitions:
225-
if is_multiline {
226-
let value = MultilineStringSequenceValue::from_source_range(
227-
range,
228-
kind,
229-
locator,
230-
checker.tokens(),
231-
string_items,
232-
)?;
233-
assert_eq!(value.len(), elts.len());
234-
value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist())
232+
// The machinery in the `MultilineDunderAllValue` is actually
233+
// sophisticated enough that it would work just as well for
234+
// single-line `__all__` definitions, and we could reduce
235+
// the number of lines of code in this file by doing that.
236+
// Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
237+
// must process every token in an `__all__` definition as
238+
// part of its analysis, and this is quite slow. For
239+
// single-line `__all__` definitions, it's also unnecessary,
240+
// as it's impossible to have comments in between the
241+
// `__all__` elements if the `__all__` definition is all on
242+
// a single line. Therefore, as an optimisation, we do the
243+
// bare minimum of token-processing for single-line `__all__`
244+
// definitions:
245+
let (sorted_source_code, applicability) = if is_multiline {
246+
let value = MultilineStringSequenceValue::from_source_range(
247+
range,
248+
kind,
249+
locator,
250+
checker.tokens(),
251+
string_items,
252+
)?;
253+
assert_eq!(value.len(), elts.len());
254+
let applicability = if value.comment_complexity().is_complex() {
255+
Applicability::Unsafe
235256
} else {
236-
sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE)
237-
}
257+
Applicability::Safe
258+
};
259+
let sorted_source =
260+
value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist());
261+
(sorted_source, applicability)
262+
} else {
263+
let sorted_source =
264+
sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE);
265+
(sorted_source, Applicability::Safe)
238266
};
239267

240-
Some(Fix::safe_edit(Edit::range_replacement(
241-
sorted_source_code,
242-
range,
243-
)))
268+
let edit = Edit::range_replacement(sorted_source_code, range);
269+
Some(Fix::applicable_edit(edit, applicability))
244270
}

crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs

+65-27
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ use ruff_text_size::{Ranged, TextRange};
1111

1212
use crate::checkers::ast::Checker;
1313
use crate::rules::ruff::rules::sequence_sorting::{
14-
sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind,
15-
SortClassification, SortingStyle,
14+
sort_single_line_elements_sequence, CommentComplexity, MultilineStringSequenceValue,
15+
SequenceKind, SortClassification, SortingStyle,
1616
};
1717
use crate::Locator;
1818

@@ -37,24 +37,51 @@ use crate::Locator;
3737
/// ```
3838
///
3939
/// ## Fix safety
40-
/// This rule's fix is marked as unsafe whenever Ruff can detect that code
41-
/// elsewhere in the same file reads the `__slots__` variable in some way.
42-
/// This is because the order of the items in `__slots__` may have semantic
43-
/// significance if the `__slots__` of a class is being iterated over, or
44-
/// being assigned to another value.
40+
/// This rule's fix is marked as unsafe in three situations.
41+
///
42+
/// Firstly, the fix is unsafe if there are any comments that take up
43+
/// a whole line by themselves inside the `__slots__` definition, for example:
44+
/// ```py
45+
/// class Foo:
46+
/// __slots__ = [
47+
/// # eggy things
48+
/// "duck_eggs",
49+
/// "chicken_eggs",
50+
/// # hammy things
51+
/// "country_ham",
52+
/// "parma_ham",
53+
/// ]
54+
/// ```
55+
///
56+
/// This is a common pattern used to delimit categories within a class's slots,
57+
/// but it would be out of the scope of this rule to attempt to maintain these
58+
/// categories when applying a natural sort to the items of `__slots__`.
59+
///
60+
/// Secondly, the fix is also marked as unsafe if there are more than two
61+
/// `__slots__` items on a single line and that line also has a trailing
62+
/// comment, since here it is impossible to accurately gauge which item the
63+
/// comment should be moved with when sorting `__slots__`:
64+
/// ```py
65+
/// class Bar:
66+
/// __slots__ = [
67+
/// "a", "c", "e", # a comment
68+
/// "b", "d", "f", # a second comment
69+
/// ]
70+
/// ```
71+
///
72+
/// Lastly, this rule's fix is marked as unsafe whenever Ruff can detect that
73+
/// code elsewhere in the same file reads the `__slots__` variable in some way
74+
/// and the `__slots__` variable is not assigned to a set. This is because the
75+
/// order of the items in `__slots__` may have semantic significance if the
76+
/// `__slots__` of a class is being iterated over, or being assigned to another
77+
/// value.
4578
///
4679
/// In the vast majority of other cases, this rule's fix is unlikely to
4780
/// cause breakage; as such, Ruff will otherwise mark this rule's fix as
4881
/// safe. However, note that (although it's rare) the value of `__slots__`
4982
/// could still be read by code outside of the module in which the
5083
/// `__slots__` definition occurs, in which case this rule's fix could
5184
/// theoretically cause breakage.
52-
///
53-
/// Additionally, note that for multiline `__slots__` definitions that
54-
/// include comments on their own line, it can be hard to tell where the
55-
/// comments should be moved to when sorting the contents of `__slots__`.
56-
/// While this rule's fix will never delete a comment, it might *sometimes*
57-
/// move a comment to an unexpected location.
5885
#[violation]
5986
pub struct UnsortedDunderSlots {
6087
class_name: ast::name::Name,
@@ -122,15 +149,17 @@ pub(crate) fn sort_dunder_slots(checker: &Checker, binding: &Binding) -> Option<
122149
);
123150

124151
if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification {
125-
if let Some(sorted_source_code) = display.generate_sorted_source_code(&items, checker) {
152+
if let Some((sorted_source_code, comment_complexity)) =
153+
display.generate_sorted_source_code(&items, checker)
154+
{
126155
let edit = Edit::range_replacement(sorted_source_code, display.range());
127-
128-
let applicability = if display.kind.is_set_literal() || binding.is_unused() {
129-
Applicability::Safe
130-
} else {
156+
let applicability = if comment_complexity.is_complex()
157+
|| (binding.is_used() && !display.kind.is_set_literal())
158+
{
131159
Applicability::Unsafe
160+
} else {
161+
Applicability::Safe
132162
};
133-
134163
diagnostic.set_fix(Fix::applicable_edit(edit, applicability));
135164
}
136165
}
@@ -219,7 +248,11 @@ impl<'a> StringLiteralDisplay<'a> {
219248
Some(result)
220249
}
221250

222-
fn generate_sorted_source_code(&self, elements: &[&str], checker: &Checker) -> Option<String> {
251+
fn generate_sorted_source_code(
252+
&self,
253+
elements: &[&str],
254+
checker: &Checker,
255+
) -> Option<(String, CommentComplexity)> {
223256
let locator = checker.locator();
224257

225258
let multiline_classification = if locator.contains_line_break(self.range()) {
@@ -238,26 +271,31 @@ impl<'a> StringLiteralDisplay<'a> {
238271
elements,
239272
)?;
240273
assert_eq!(analyzed_sequence.len(), self.elts.len());
241-
Some(analyzed_sequence.into_sorted_source_code(
274+
let comment_complexity = analyzed_sequence.comment_complexity();
275+
let sorted_code = analyzed_sequence.into_sorted_source_code(
242276
SORTING_STYLE,
243277
locator,
244278
checker.stylist(),
245-
))
279+
);
280+
Some((sorted_code, comment_complexity))
246281
}
247282
// Sorting multiline dicts is unsupported
248283
(DisplayKind::Dict { .. }, MultilineClassification::Multiline) => None,
249284
(DisplayKind::Sequence(sequence_kind), MultilineClassification::SingleLine) => {
250-
Some(sort_single_line_elements_sequence(
285+
let sorted_code = sort_single_line_elements_sequence(
251286
*sequence_kind,
252287
&self.elts,
253288
elements,
254289
locator,
255290
SORTING_STYLE,
256-
))
291+
);
292+
Some((sorted_code, CommentComplexity::Simple))
293+
}
294+
(DisplayKind::Dict { items }, MultilineClassification::SingleLine) => {
295+
let sorted_code =
296+
sort_single_line_elements_dict(&self.elts, elements, items, locator);
297+
Some((sorted_code, CommentComplexity::Simple))
257298
}
258-
(DisplayKind::Dict { items }, MultilineClassification::SingleLine) => Some(
259-
sort_single_line_elements_dict(&self.elts, elements, items, locator),
260-
),
261299
}
262300
}
263301
}

0 commit comments

Comments
 (0)