Skip to content

Commit

Permalink
Foo
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Feb 4, 2024
1 parent 77cd412 commit 03a1844
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 16 deletions.
20 changes: 9 additions & 11 deletions crates/ruff_linter/src/rules/eradicate/detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ use aho_corasick::AhoCorasick;
use itertools::Itertools;
use once_cell::sync::Lazy;
use regex::{Regex, RegexSet};
use ruff_python_parser::lexer::lex;

use ruff_python_parser::parse_suite;
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::{Ranged, TextSize};
use ruff_text_size::TextSize;

static CODE_INDICATORS: Lazy<AhoCorasick> = Lazy::new(|| {
AhoCorasick::new([
Expand Down Expand Up @@ -47,6 +46,14 @@ pub(crate) fn comment_contains_code(line: &str, task_tags: &[String]) -> bool {
return false;
}

// Fast path: if the comment contains consecutive identifiers, we know it won't parse.
let tokenizer = SimpleTokenizer::starts_at(TextSize::default(), line).skip_trivia();
if tokenizer.tuple_windows().any(|(first, second)| {
first.kind == SimpleTokenKind::Name && second.kind == SimpleTokenKind::Name
}) {
return false;
}

// Ignore task tag comments (e.g., "# TODO(tom): Refactor").
if line
.split(&[' ', ':', '('])
Expand All @@ -56,15 +63,6 @@ pub(crate) fn comment_contains_code(line: &str, task_tags: &[String]) -> bool {
return false;
}

// Fast path: if the comment starts with two consecutive identifiers, we know it won't parse,
// unless the first identifier is a keyword.
let mut tokenizer = SimpleTokenizer::starts_at(TextSize::default(), line).skip_trivia();
for (a, b) in tokenizer.tuple_windows() {
if a.kind == SimpleTokenKind::Name && b.kind == SimpleTokenKind::Name {
return false;
}
}

// Ignore whitelisted comments.
if ALLOWLIST_REGEX.is_match(line) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ expression: test_case.tokens()
---
[
SimpleToken {
kind: Other,
kind: Name,
range: 0..2,
},
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
SimpleToken {
kind: Name,
range: 0..3,
},
SimpleToken {
kind: Whitespace,
range: 3..4,
},
SimpleToken {
kind: Name,
range: 4..7,
},
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
source: crates/ruff_python_trivia/src/tokenizer.rs
expression: test_case.tokens()
---
[
SimpleToken {
kind: Other,
range: 0..1,
},
SimpleToken {
kind: Bogus,
range: 1..6,
},
]
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ expression: test_case.tokens()
---
[
SimpleToken {
kind: Other,
kind: Name,
range: 0..6,
},
]
28 changes: 25 additions & 3 deletions crates/ruff_python_trivia/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ fn to_keyword_or_other(source: &str) -> SimpleTokenKind {
"case" => SimpleTokenKind::Case,
"with" => SimpleTokenKind::With,
"yield" => SimpleTokenKind::Yield,
_ => SimpleTokenKind::Name, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
_ => SimpleTokenKind::Name, // Potentially an identifier, but only if it isn't a string prefix. The caller is responsible for enforcing that constraint.
}
}

Expand Down Expand Up @@ -569,10 +569,13 @@ impl<'a> SimpleTokenizer<'a> {
let range = TextRange::at(self.offset, token_len);
let kind = to_keyword_or_other(&self.source[range]);

if kind == SimpleTokenKind::Other {
// If the next character is a quote, we're in a string prefix. For example: `f"foo`.
if kind == SimpleTokenKind::Name && matches!(self.cursor.first(), '"' | '\'') {
self.bogus = true;
SimpleTokenKind::Other
} else {
kind
}
kind
}

// Space, tab, or form feed. We ignore the true semantics of form feed, and treat it as
Expand Down Expand Up @@ -1156,6 +1159,25 @@ mod tests {
test_case.assert_reverse_tokenization();
}

#[test]
fn string_with_kind() {
let source = "f'foo'";

let test_case = tokenize(source);
assert_debug_snapshot!(test_case.tokens());

// note: not reversible: [other, bogus] vs [bogus, other]
}

#[test]
fn identifier_starting_with_string_kind() {
let source = "foo bar";

let test_case = tokenize(source);
assert_debug_snapshot!(test_case.tokens());
test_case.assert_reverse_tokenization();
}

#[test]
fn ignore_word_with_only_id_continuing_chars() {
let source = "555";
Expand Down

0 comments on commit 03a1844

Please sign in to comment.