Skip to content

Commit

Permalink
Add fast-path for comment detection
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Feb 3, 2024
1 parent c53aae0 commit 905202d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 4 deletions.
26 changes: 23 additions & 3 deletions crates/ruff_linter/src/rules/eradicate/detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ use once_cell::sync::Lazy;
use regex::{Regex, RegexSet};

use ruff_python_parser::parse_suite;
use ruff_python_trivia::{SimpleTokenKind, SimpleTokenizer};
use ruff_text_size::{Ranged, TextSize};

static CODE_INDICATORS: Lazy<AhoCorasick> = Lazy::new(|| {
AhoCorasick::new([
"(", ")", "[", "]", "{", "}", ":", "=", "%", "print", "return", "break", "continue",
"import",
"(", ")", "[", "]", "{", "}", ":", "=", "%", "return", "break", "continue", "import",
])
.unwrap()
});
Expand Down Expand Up @@ -53,6 +54,24 @@ pub(crate) fn comment_contains_code(line: &str, task_tags: &[String]) -> bool {
return false;
}

// Fast path: if the comment starts with two consecutive identifiers, we know it won't parse,
// unless the first identifier is a keyword.
if let Some(token) = SimpleTokenizer::starts_at(TextSize::default(), line)
.skip_trivia()
.next()
{
if token.kind == SimpleTokenKind::Other {
if let Some(token) = SimpleTokenizer::starts_at(token.end(), line)
.skip_trivia()
.next()
{
if token.kind == SimpleTokenKind::Other {
return false;
}
}
}
}

// Ignore whitelisted comments.
if ALLOWLIST_REGEX.is_match(line) {
return false;
Expand Down Expand Up @@ -123,9 +142,10 @@ mod tests {

#[test]
fn comment_contains_code_with_print() {
assert!(comment_contains_code("#print", &[]));
assert!(comment_contains_code("#print(1)", &[]));

assert!(!comment_contains_code("#print", &[]));
assert!(!comment_contains_code("#print 1", &[]));
assert!(!comment_contains_code("#to print", &[]));
}

Expand Down
5 changes: 4 additions & 1 deletion crates/ruff_python_trivia/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ fn to_keyword_or_other(source: &str) -> SimpleTokenKind {
"case" => SimpleTokenKind::Case,
"with" => SimpleTokenKind::With,
"yield" => SimpleTokenKind::Yield,
_ => SimpleTokenKind::Other, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
_ => SimpleTokenKind::Name, // Potentially an identifier, but only if it isn't a string prefix. We can ignore this for now https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
}
}

Expand Down Expand Up @@ -467,6 +467,9 @@ pub enum SimpleTokenKind {
/// `yield`
Yield,

/// An identifier or keyword.
Name,

/// Any other non trivia token.
Other,

Expand Down

0 comments on commit 905202d

Please sign in to comment.