diff --git a/crates/ruff_cli/src/args.rs b/crates/ruff_cli/src/args.rs index f4b7182a2e6f5f..3a4d6b3d41da94 100644 --- a/crates/ruff_cli/src/args.rs +++ b/crates/ruff_cli/src/args.rs @@ -360,6 +360,14 @@ pub struct FormatCommand { /// Path to the `pyproject.toml` or `ruff.toml` file to use for configuration. #[arg(long, conflicts_with = "isolated")] pub config: Option, + + /// Disable cache reads. + #[arg(short, long, help_heading = "Miscellaneous")] + pub no_cache: bool, + /// Path to the cache directory. + #[arg(long, env = "RUFF_CACHE_DIR", help_heading = "Miscellaneous")] + pub cache_dir: Option, + /// Respect file exclusions via `.gitignore` and other standard ignore files. /// Use `--no-respect-gitignore` to disable. #[arg( @@ -530,6 +538,7 @@ impl FormatCommand { config: self.config, files: self.files, isolated: self.isolated, + no_cache: self.no_cache, stdin_filename: self.stdin_filename, }, CliOverrides { @@ -542,6 +551,8 @@ impl FormatCommand { preview: resolve_bool_arg(self.preview, self.no_preview).map(PreviewMode::from), force_exclude: resolve_bool_arg(self.force_exclude, self.no_force_exclude), target_version: self.target_version, + cache_dir: self.cache_dir, + // Unsupported on the formatter CLI, but required on `Overrides`. ..CliOverrides::default() }, @@ -585,6 +596,7 @@ pub struct CheckArguments { #[allow(clippy::struct_excessive_bools)] pub struct FormatArguments { pub check: bool, + pub no_cache: bool, pub diff: bool, pub config: Option, pub files: Vec, diff --git a/crates/ruff_cli/src/cache.rs b/crates/ruff_cli/src/cache.rs index f6595c43370a52..210931694c38d9 100644 --- a/crates/ruff_cli/src/cache.rs +++ b/crates/ruff_cli/src/cache.rs @@ -1,27 +1,33 @@ use std::collections::HashMap; +use std::fmt::Debug; use std::fs::{self, File}; use std::hash::Hasher; use std::io::{self, BufReader, BufWriter, Write}; +use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Mutex; use std::time::{Duration, SystemTime}; use anyhow::{Context, Result}; +use filetime::FileTime; +use itertools::Itertools; +use log::{debug, error}; +use rayon::iter::ParallelIterator; +use rayon::iter::{IntoParallelIterator, ParallelBridge}; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; use ruff_cache::{CacheKey, CacheKeyHasher}; -use ruff_diagnostics::{DiagnosticKind, Fix}; -use ruff_linter::message::Message; use ruff_linter::warn_user; -use ruff_notebook::NotebookIndex; -use ruff_python_ast::imports::ImportMap; -use ruff_source_file::SourceFileBuilder; -use ruff_text_size::{TextRange, TextSize}; +use ruff_macros::CacheKey; +use ruff_workspace::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy, Resolver}; use ruff_workspace::Settings; -use crate::diagnostics::Diagnostics; +use crate::cache; + +pub(crate) mod format; +pub(crate) mod lint; /// Maximum duration for which we keep a file in cache that hasn't been seen. const MAX_LAST_SEEN: Duration = Duration::from_secs(30 * 24 * 60 * 60); // 30 days. @@ -31,6 +37,31 @@ pub(crate) type RelativePath = Path; /// [`PathBuf`] that is relative to the package root in [`PackageCache`]. pub(crate) type RelativePathBuf = PathBuf; +#[derive(CacheKey)] +pub(crate) struct FileCacheKey { + /// Timestamp when the file was last modified before the (cached) check. + file_last_modified: FileTime, + /// Permissions of the file before the (cached) check. + file_permissions_mode: u32, +} + +impl FileCacheKey { + pub(crate) fn from_path(path: &Path) -> io::Result { + // Construct a cache key for the file + let metadata = path.metadata()?; + + #[cfg(unix)] + let permissions = metadata.permissions().mode(); + #[cfg(windows)] + let permissions: u32 = metadata.permissions().readonly().into(); + + Ok(FileCacheKey { + file_last_modified: FileTime::from_last_modification_time(&metadata), + file_permissions_mode: permissions, + }) + } +} + /// Cache. /// /// `Cache` holds everything required to display the diagnostics for a single @@ -40,23 +71,26 @@ pub(crate) type RelativePathBuf = PathBuf; /// This type manages the cache file, reading it from disk and writing it back /// to disk (if required). #[derive(Debug)] -pub(crate) struct Cache { +pub(crate) struct Cache { /// Location of the cache. path: PathBuf, /// Package cache read from disk. - package: PackageCache, + package: PackageCache, /// Changes made compared to the (current) `package`. /// /// Files that are linted, but are not in `package.files` or are in /// `package.files` but are outdated. This gets merged with `package.files` /// when the cache is written back to disk in [`Cache::store`]. - new_files: Mutex>, + new_files: Mutex>>, /// The "current" timestamp used as cache for the updates of /// [`FileCache::last_seen`] last_seen_cache: u64, } -impl Cache { +impl Cache +where + for<'a> Data: Serialize + Deserialize<'a>, +{ /// Open or create a new cache. /// /// `package_root` is the path to root of the package that is contained @@ -65,12 +99,12 @@ impl Cache { /// /// Finally `settings` is used to ensure we don't open a cache for different /// settings. It also defines the directory where to store the cache. - pub(crate) fn open(package_root: PathBuf, settings: &Settings) -> Cache { + pub(crate) fn open(name: &str, package_root: PathBuf, settings: &Settings) -> Self { debug_assert!(package_root.is_absolute(), "package root not canonicalized"); let mut buf = itoa::Buffer::new(); let key = Path::new(buf.format(cache_key(&package_root, settings))); - let path = PathBuf::from_iter([&settings.cache_dir, Path::new("content"), key]); + let path = PathBuf::from_iter([&settings.cache_dir, Path::new(name), key]); let file = match File::open(&path) { Ok(file) => file, @@ -84,7 +118,8 @@ impl Cache { } }; - let mut package: PackageCache = match bincode::deserialize_from(BufReader::new(file)) { + let mut package: PackageCache = match bincode::deserialize_from(BufReader::new(file)) + { Ok(package) => package, Err(err) => { warn_user!("Failed parse cache file '{}': {err}", path.display()); @@ -105,7 +140,7 @@ impl Cache { } /// Create an empty `Cache`. - fn empty(path: PathBuf, package_root: PathBuf) -> Cache { + fn empty(path: PathBuf, package_root: PathBuf) -> Self { let package = PackageCache { package_root, files: HashMap::new(), @@ -114,7 +149,7 @@ impl Cache { } #[allow(clippy::cast_possible_truncation)] - fn new(path: PathBuf, package: PackageCache) -> Cache { + fn new(path: PathBuf, package: PackageCache) -> Self { Cache { path, package, @@ -169,7 +204,10 @@ impl Cache { /// /// This returns `None` if `key` differs from the cached key or if the /// cache doesn't contain results for the file. - pub(crate) fn get(&self, path: &RelativePath, key: &T) -> Option<&FileCache> { + pub(crate) fn get(&self, path: &RelativePath, key: &FileCacheKey) -> Option<&Data> + where + Data: Debug, + { let file = self.package.files.get(path)?; let mut hasher = CacheKeyHasher::new(); @@ -182,51 +220,18 @@ impl Cache { file.last_seen.store(self.last_seen_cache, Ordering::SeqCst); - Some(file) + Some(&file.data) } /// Add or update a file cache at `path` relative to the package root. - pub(crate) fn update( - &self, - path: RelativePathBuf, - key: T, - messages: &[Message], - imports: &ImportMap, - notebook_index: Option<&NotebookIndex>, - ) { - let source = if let Some(msg) = messages.first() { - msg.file.source_text().to_owned() - } else { - String::new() // No messages, no need to keep the source! - }; - - let messages = messages - .iter() - .map(|msg| { - // Make sure that all message use the same source file. - assert!( - msg.file == messages.first().unwrap().file, - "message uses a different source file" - ); - CacheMessage { - kind: msg.kind.clone(), - range: msg.range, - fix: msg.fix.clone(), - noqa_offset: msg.noqa_offset, - } - }) - .collect(); - + pub(crate) fn update(&self, path: RelativePathBuf, key: &FileCacheKey, data: Data) { let mut hasher = CacheKeyHasher::new(); key.cache_key(&mut hasher); let file = FileCache { key: hasher.finish(), last_seen: AtomicU64::new(self.last_seen_cache), - imports: imports.clone(), - messages, - source, - notebook_index: notebook_index.cloned(), + data, }; self.new_files.lock().unwrap().insert(path, file); } @@ -234,19 +239,19 @@ impl Cache { /// On disk representation of a cache of a package. #[derive(Deserialize, Debug, Serialize)] -struct PackageCache { +struct PackageCache { /// Path to the root of the package. /// /// Usually this is a directory, but it can also be a single file in case of /// single file "packages", e.g. scripts. package_root: PathBuf, /// Mapping of source file path to it's cached data. - files: HashMap, + files: HashMap>, } /// On disk representation of the cache per source file. #[derive(Deserialize, Debug, Serialize)] -pub(crate) struct FileCache { +pub(crate) struct FileCache { /// Key that determines if the cached item is still valid. key: u64, /// Timestamp when we last linted this file. @@ -254,55 +259,8 @@ pub(crate) struct FileCache { /// Represented as the number of milliseconds since Unix epoch. This will /// break in 1970 + ~584 years (~2554). last_seen: AtomicU64, - /// Imports made. - imports: ImportMap, - /// Diagnostic messages. - messages: Vec, - /// Source code of the file. - /// - /// # Notes - /// - /// This will be empty if `messages` is empty. - source: String, - /// Notebook index if this file is a Jupyter Notebook. - notebook_index: Option, -} -impl FileCache { - /// Convert the file cache into `Diagnostics`, using `path` as file name. - pub(crate) fn as_diagnostics(&self, path: &Path) -> Diagnostics { - let messages = if self.messages.is_empty() { - Vec::new() - } else { - let file = SourceFileBuilder::new(path.to_string_lossy(), &*self.source).finish(); - self.messages - .iter() - .map(|msg| Message { - kind: msg.kind.clone(), - range: msg.range, - fix: msg.fix.clone(), - file: file.clone(), - noqa_offset: msg.noqa_offset, - }) - .collect() - }; - let notebook_indexes = if let Some(notebook_index) = self.notebook_index.as_ref() { - FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())]) - } else { - FxHashMap::default() - }; - Diagnostics::new(messages, self.imports.clone(), notebook_indexes) - } -} - -/// On disk representation of a diagnostic message. -#[derive(Deserialize, Debug, Serialize)] -struct CacheMessage { - kind: DiagnosticKind, - /// Range into the message's [`FileCache::source`]. - range: TextRange, - fix: Option, - noqa_offset: TextSize, + data: Data, } /// Returns a hash key based on the `package_root`, `settings` and the crate @@ -316,9 +274,9 @@ fn cache_key(package_root: &Path, settings: &Settings) -> u64 { } /// Initialize the cache at the specified `Path`. -pub(crate) fn init(path: &Path) -> Result<()> { +pub(crate) fn init(name: &str, path: &Path) -> Result<()> { // Create the cache directories. - fs::create_dir_all(path.join("content"))?; + fs::create_dir_all(path.join(name))?; // Add the CACHEDIR.TAG. if !cachedir::is_tagged(path)? { @@ -335,40 +293,139 @@ pub(crate) fn init(path: &Path) -> Result<()> { Ok(()) } +pub(crate) trait PackageCaches { + fn get(&self, package_root: &Path) -> Option<&Cache>; + + fn store(self) -> anyhow::Result<()>; +} + +impl PackageCaches for Option +where + T: PackageCaches, +{ + fn get(&self, package_root: &Path) -> Option<&Cache> { + match self { + None => None, + Some(caches) => caches.get(package_root), + } + } + + fn store(self) -> Result<()> { + match self { + None => Ok(()), + Some(caches) => caches.store(), + } + } +} + +pub(crate) struct PackageCacheMap<'a, Data>(FxHashMap<&'a Path, Cache>); + +impl<'a, Data> PackageCacheMap<'a, Data> +where + for<'b> Data: Send + Serialize + Deserialize<'b>, +{ + pub(crate) fn init( + name: &str, + pyproject_config: &PyprojectConfig, + package_roots: &FxHashMap<&'a Path, Option<&'a Path>>, + resolver: &Resolver, + ) -> Self { + fn init_cache(name: &str, path: &Path) { + if let Err(e) = cache::init(name, path) { + error!("Failed to initialize cache at {}: {e:?}", path.display()); + } + } + + match pyproject_config.strategy { + PyprojectDiscoveryStrategy::Fixed => { + init_cache(name, &pyproject_config.settings.cache_dir); + } + PyprojectDiscoveryStrategy::Hierarchical => { + for settings in + std::iter::once(&pyproject_config.settings).chain(resolver.settings()) + { + init_cache(name, &settings.cache_dir); + } + } + } + + Self( + package_roots + .iter() + .map(|(package, package_root)| package_root.unwrap_or(package)) + .unique() + .par_bridge() + .map(|cache_root| { + let settings = resolver.resolve(cache_root, pyproject_config); + let cache = Cache::open(name, cache_root.to_path_buf(), settings); + (cache_root, cache) + }) + .collect(), + ) + } +} + +impl PackageCaches for PackageCacheMap<'_, Data> +where + for<'a> Data: Send + Serialize + Deserialize<'a>, +{ + fn get(&self, package_root: &Path) -> Option<&Cache> { + let cache = self.0.get(package_root); + + if cache.is_none() { + debug!("No cache found for {}", package_root.display()); + } + + cache + } + + fn store(self) -> Result<()> { + self.0 + .into_par_iter() + .try_for_each(|(_, cache)| cache.store()) + } +} + #[cfg(test)] mod tests { - use filetime::{set_file_mtime, FileTime}; - use ruff_linter::settings::types::UnsafeFixes; use std::env::temp_dir; use std::fs; use std::io; use std::io::Write; use std::path::{Path, PathBuf}; + use std::sync::atomic::AtomicU64; use std::time::SystemTime; + use anyhow::Result; + use filetime::{set_file_mtime, FileTime}; use itertools::Itertools; + use serde::{Deserialize, Serialize}; + use test_case::test_case; + use ruff_cache::CACHE_DIR_NAME; use ruff_linter::settings::flags; + use ruff_linter::settings::types::UnsafeFixes; + use ruff_linter::source_kind::SourceKind; + use ruff_python_ast::imports::ImportMap; + use ruff_python_ast::PySourceType; + use ruff_workspace::Settings; + use crate::cache::format::FormatCache; + use crate::cache::lint::{LintCache, LintCacheData}; use crate::cache::RelativePathBuf; - use crate::cache::{self, Cache, FileCache}; + use crate::cache::{self, FileCache}; + use crate::commands::format::{ + format_path, format_source, FormatCommandError, FormatMode, FormatResult, + }; use crate::diagnostics::{lint_path, Diagnostics}; - use std::sync::atomic::AtomicU64; - - use anyhow::Result; - use ruff_python_ast::imports::ImportMap; - - use ruff_workspace::Settings; - use test_case::test_case; - #[test_case("../ruff_linter/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_linter"; "ruff_linter_fixtures")] #[test_case("../ruff_notebook/resources/test/fixtures", "ruff_tests/cache_same_results_ruff_notebook"; "ruff_notebook_fixtures")] fn same_results(package_root: &str, cache_dir_path: &str) { let mut cache_dir = temp_dir(); cache_dir.push(cache_dir_path); let _ = fs::remove_dir_all(&cache_dir); - cache::init(&cache_dir).unwrap(); + cache::init("lint", &cache_dir).unwrap(); let settings = Settings { cache_dir, @@ -376,7 +433,7 @@ mod tests { }; let package_root = fs::canonicalize(package_root).unwrap(); - let cache = Cache::open(package_root.clone(), &settings); + let cache = LintCache::open("lint", package_root.clone(), &settings); assert_eq!(cache.new_files.lock().unwrap().len(), 0); let mut paths = Vec::new(); @@ -429,7 +486,7 @@ mod tests { cache.store().unwrap(); - let cache = Cache::open(package_root.clone(), &settings); + let cache = LintCache::open("lint", package_root.clone(), &settings); assert_ne!(cache.package.files.len(), 0); parse_errors.sort(); @@ -470,12 +527,12 @@ mod tests { let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; let test_cache = TestCache::new("cache_adds_file_on_lint"); - let cache = test_cache.open(); + let cache: LintCache = test_cache.open("lint"); test_cache.write_source_file("source.py", source); assert_eq!(cache.new_files.lock().unwrap().len(), 0); cache.store().unwrap(); - let cache = test_cache.open(); + let cache: LintCache = test_cache.open("lint"); test_cache .lint_file_with_cache("source.py", &cache) @@ -490,24 +547,24 @@ mod tests { } #[test] - fn cache_adds_files_on_lint() { + fn cache_adds_files_on_format() { let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; - let test_cache = TestCache::new("cache_adds_files_on_lint"); - let cache = test_cache.open(); + let test_cache = TestCache::new("cache_adds_files_on_format"); + let cache: FormatCache = test_cache.open("format"); test_cache.write_source_file("source_1.py", source); test_cache.write_source_file("source_2.py", source); assert_eq!(cache.new_files.lock().unwrap().len(), 0); cache.store().unwrap(); - let cache = test_cache.open(); + let cache: FormatCache = test_cache.open("format"); test_cache - .lint_file_with_cache("source_1.py", &cache) - .expect("Failed to lint test file"); + .format_file_with_cache("source_1.py", &cache) + .expect("Failed to format test file"); test_cache - .lint_file_with_cache("source_2.py", &cache) - .expect("Failed to lint test file"); + .format_file_with_cache("source_2.py", &cache) + .expect("Failed to format test file"); assert_eq!( cache.new_files.lock().unwrap().len(), 2, @@ -522,7 +579,7 @@ mod tests { let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; let test_cache = TestCache::new("cache_invalidated_on_file_modified_time"); - let cache = test_cache.open(); + let cache: LintCache = test_cache.open("lint"); let source_path = test_cache.write_source_file("source.py", source); assert_eq!(cache.new_files.lock().unwrap().len(), 0); @@ -531,7 +588,7 @@ mod tests { .expect("Failed to lint test file"); cache.store().unwrap(); - let cache = test_cache.open(); + let cache: LintCache = test_cache.open("lint"); // Update the modified time of the file to a time in the future set_file_mtime( @@ -581,7 +638,7 @@ mod tests { let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; let test_cache = TestCache::new("cache_invalidated_on_permission_change"); - let cache = test_cache.open(); + let cache: LintCache = test_cache.open("lint"); let path = test_cache.write_source_file("source.py", source); assert_eq!(cache.new_files.lock().unwrap().len(), 0); @@ -590,7 +647,7 @@ mod tests { .unwrap(); cache.store().unwrap(); - let cache = test_cache.open(); + let cache: LintCache = test_cache.open("lint"); // Flip the permissions on the file #[cfg(unix)] @@ -617,7 +674,7 @@ mod tests { #[test] fn cache_removes_stale_files_on_store() { let test_cache = TestCache::new("cache_removes_stale_files_on_store"); - let mut cache = test_cache.open(); + let mut cache: LintCache = test_cache.open("lint"); // Add a file to the cache that hasn't been linted or seen since the '70s! let old_path_key = RelativePathBuf::from("old.py"); @@ -626,10 +683,12 @@ mod tests { FileCache { key: 123, last_seen: AtomicU64::new(123), - imports: ImportMap::new(), - messages: Vec::new(), - source: String::new(), - notebook_index: None, + data: LintCacheData { + imports: ImportMap::new(), + messages: Vec::new(), + source: String::new(), + notebook_index: None, + }, }, ); @@ -646,10 +705,11 @@ mod tests { // Storing the cache should remove the old (`old.py`) file. cache.store().unwrap(); // So we when we open the cache again it shouldn't contain `old.py`. - let cache = test_cache.open(); + let cache: LintCache = test_cache.open("lint"); - assert!( - cache.package.files.keys().collect_vec() == vec![&new_path_key], + assert_eq!( + cache.package.files.keys().collect_vec(), + vec![&new_path_key], "Only the new file should be present" ); } @@ -660,11 +720,11 @@ mod tests { } impl TestCache { - fn new(name: &str) -> Self { + fn new(test_case: &str) -> Self { // Build a new cache directory and clear it let mut test_dir = temp_dir(); test_dir.push("ruff_tests/cache"); - test_dir.push(name); + test_dir.push(test_case); let _ = fs::remove_dir_all(&test_dir); @@ -672,7 +732,8 @@ mod tests { let cache_dir = test_dir.join("cache"); let package_root = test_dir.join("package"); - cache::init(&cache_dir).unwrap(); + cache::init("lint", &cache_dir).unwrap(); + cache::init("format", &cache_dir).unwrap(); fs::create_dir(package_root.clone()).unwrap(); let settings = Settings { @@ -699,14 +760,17 @@ mod tests { path } - fn open(&self) -> Cache { - Cache::open(self.package_root.clone(), &self.settings) + fn open(&self, name: &str) -> cache::Cache + where + for<'a> Data: Serialize + Deserialize<'a>, + { + cache::Cache::open(name, self.package_root.clone(), &self.settings) } fn lint_file_with_cache( &self, path: &str, - cache: &Cache, + cache: &LintCache, ) -> Result { lint_path( &self.package_root.join(path), @@ -718,6 +782,22 @@ mod tests { UnsafeFixes::Enabled, ) } + + fn format_file_with_cache( + &self, + path: &str, + cache: &FormatCache, + ) -> Result { + let file_path = self.package_root.join(path); + format_path( + &file_path, + &self.settings.formatter, + &SourceKind::Python(std::fs::read_to_string(&file_path).unwrap()), + PySourceType::Python, + FormatMode::Write, + Some(cache), + ) + } } impl Drop for TestCache { diff --git a/crates/ruff_cli/src/cache/format.rs b/crates/ruff_cli/src/cache/format.rs new file mode 100644 index 00000000000000..8d11a31b62961e --- /dev/null +++ b/crates/ruff_cli/src/cache/format.rs @@ -0,0 +1,3 @@ +use crate::cache::Cache; + +pub(crate) type FormatCache = Cache<()>; diff --git a/crates/ruff_cli/src/cache/lint.rs b/crates/ruff_cli/src/cache/lint.rs new file mode 100644 index 00000000000000..3a3c3591c1e999 --- /dev/null +++ b/crates/ruff_cli/src/cache/lint.rs @@ -0,0 +1,106 @@ +use std::path::Path; + +use rustc_hash::FxHashMap; +use serde::{Deserialize, Serialize}; + +use ruff_diagnostics::{DiagnosticKind, Fix}; +use ruff_linter::message::Message; +use ruff_notebook::NotebookIndex; +use ruff_python_ast::imports::ImportMap; +use ruff_source_file::SourceFileBuilder; +use ruff_text_size::{TextRange, TextSize}; + +use crate::cache::Cache; +use crate::diagnostics::Diagnostics; + +pub(crate) type LintCache = Cache; + +#[derive(Deserialize, Debug, Serialize)] +pub(crate) struct LintCacheData { + /// Imports made. + pub(super) imports: ImportMap, + /// Diagnostic messages. + pub(super) messages: Vec, + /// Source code of the file. + /// + /// # Notes + /// + /// This will be empty if `messages` is empty. + pub(super) source: String, + /// Notebook index if this file is a Jupyter Notebook. + pub(super) notebook_index: Option, +} + +impl LintCacheData { + pub(crate) fn from_messages( + messages: &[Message], + imports: ImportMap, + notebook_index: Option, + ) -> Self { + let source = if let Some(msg) = messages.first() { + msg.file.source_text().to_owned() + } else { + String::new() // No messages, no need to keep the source! + }; + + let messages = messages + .iter() + .map(|msg| { + // Make sure that all message use the same source file. + assert_eq!( + msg.file, + messages.first().unwrap().file, + "message uses a different source file" + ); + CacheMessage { + kind: msg.kind.clone(), + range: msg.range, + fix: msg.fix.clone(), + noqa_offset: msg.noqa_offset, + } + }) + .collect(); + + Self { + imports, + messages, + source, + notebook_index, + } + } + + /// Convert the file cache into `Diagnostics`, using `path` as file name. + pub(crate) fn as_diagnostics(&self, path: &Path) -> Diagnostics { + let messages = if self.messages.is_empty() { + Vec::new() + } else { + let file = SourceFileBuilder::new(path.to_string_lossy(), &*self.source).finish(); + self.messages + .iter() + .map(|msg| Message { + kind: msg.kind.clone(), + range: msg.range, + fix: msg.fix.clone(), + file: file.clone(), + noqa_offset: msg.noqa_offset, + }) + .collect() + }; + let notebook_indexes = if let Some(notebook_index) = self.notebook_index.as_ref() { + FxHashMap::from_iter([(path.to_string_lossy().to_string(), notebook_index.clone())]) + } else { + FxHashMap::default() + }; + Diagnostics::new(messages, self.imports.clone(), notebook_indexes) + } +} + +/// On disk representation of a diagnostic message. +#[derive(Deserialize, Debug, Serialize)] +pub(super) struct CacheMessage { + kind: DiagnosticKind, + /// Range into the message's [`FileCache::source`]. + range: TextRange, + fix: Option, + noqa_offset: TextSize, +} diff --git a/crates/ruff_cli/src/commands/check.rs b/crates/ruff_cli/src/commands/check.rs index 55e607e2df9119..74b21c34ac26fb 100644 --- a/crates/ruff_cli/src/commands/check.rs +++ b/crates/ruff_cli/src/commands/check.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::fmt::Write; use std::io; use std::path::{Path, PathBuf}; @@ -7,28 +6,27 @@ use std::time::Instant; use anyhow::Result; use colored::Colorize; use ignore::Error; -use itertools::Itertools; use log::{debug, error, warn}; #[cfg(not(target_family = "wasm"))] use rayon::prelude::*; -use ruff_linter::settings::types::UnsafeFixes; use rustc_hash::FxHashMap; use ruff_diagnostics::Diagnostic; use ruff_linter::message::Message; use ruff_linter::registry::Rule; +use ruff_linter::settings::types::UnsafeFixes; use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::{fs, warn_user_once, IOError}; use ruff_python_ast::imports::ImportMap; use ruff_source_file::SourceFileBuilder; use ruff_text_size::{TextRange, TextSize}; use ruff_workspace::resolver::{ - match_exclusion, python_files_in_path, PyprojectConfig, PyprojectDiscoveryStrategy, - ResolvedFile, + match_exclusion, python_files_in_path, PyprojectConfig, ResolvedFile, }; use crate::args::CliOverrides; -use crate::cache::{self, Cache}; +use crate::cache::lint::LintCache; +use crate::cache::{PackageCacheMap, PackageCaches}; use crate::diagnostics::Diagnostics; use crate::panic::catch_unwind; @@ -52,28 +50,6 @@ pub(crate) fn check( return Ok(Diagnostics::default()); } - // Initialize the cache. - if cache.into() { - fn init_cache(path: &Path) { - if let Err(e) = cache::init(path) { - error!("Failed to initialize cache at {}: {e:?}", path.display()); - } - } - - match pyproject_config.strategy { - PyprojectDiscoveryStrategy::Fixed => { - init_cache(&pyproject_config.settings.cache_dir); - } - PyprojectDiscoveryStrategy::Hierarchical => { - for settings in - std::iter::once(&pyproject_config.settings).chain(resolver.settings()) - { - init_cache(&settings.cache_dir); - } - } - } - }; - // Discover the package root for each Python file. let package_roots = resolver.package_roots( &paths @@ -85,19 +61,16 @@ pub(crate) fn check( ); // Load the caches. - let caches = bool::from(cache).then(|| { - package_roots - .iter() - .map(|(package, package_root)| package_root.unwrap_or(package)) - .unique() - .par_bridge() - .map(|cache_root| { - let settings = resolver.resolve(cache_root, pyproject_config); - let cache = Cache::open(cache_root.to_path_buf(), settings); - (cache_root, cache) - }) - .collect::>() - }); + let caches = if bool::from(cache) { + Some(PackageCacheMap::init( + "lint", + pyproject_config, + &package_roots, + &resolver, + )) + } else { + None + }; let start = Instant::now(); let diagnostics_per_file = paths.par_iter().filter_map(|resolved_file| { @@ -122,14 +95,7 @@ pub(crate) fn check( } let cache_root = package.unwrap_or_else(|| path.parent().unwrap_or(path)); - let cache = caches.as_ref().and_then(|caches| { - if let Some(cache) = caches.get(&cache_root) { - Some(cache) - } else { - debug!("No cache found for {}", cache_root.display()); - None - } - }); + let cache = caches.get(cache_root); lint_path( path, @@ -210,11 +176,7 @@ pub(crate) fn check( all_diagnostics.messages.sort(); // Store the caches. - if let Some(caches) = caches { - caches - .into_par_iter() - .try_for_each(|(_, cache)| cache.store())?; - } + caches.store()?; let duration = start.elapsed(); debug!("Checked {:?} files in: {:?}", checked_files, duration); @@ -228,7 +190,7 @@ fn lint_path( path: &Path, package: Option<&Path>, settings: &LinterSettings, - cache: Option<&Cache>, + cache: Option<&LintCache>, noqa: flags::Noqa, fix_mode: flags::FixMode, unsafe_fixes: UnsafeFixes, @@ -266,13 +228,12 @@ mod test { use std::os::unix::fs::OpenOptionsExt; use anyhow::Result; - - use ruff_linter::settings::types::UnsafeFixes; use rustc_hash::FxHashMap; use tempfile::TempDir; use ruff_linter::message::{Emitter, EmitterContext, TextEmitter}; use ruff_linter::registry::Rule; + use ruff_linter::settings::types::UnsafeFixes; use ruff_linter::settings::{flags, LinterSettings}; use ruff_workspace::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy}; use ruff_workspace::Settings; diff --git a/crates/ruff_cli/src/commands/format.rs b/crates/ruff_cli/src/commands/format.rs index 21424ed421e75c..4b068bba49e4b0 100644 --- a/crates/ruff_cli/src/commands/format.rs +++ b/crates/ruff_cli/src/commands/format.rs @@ -10,23 +10,25 @@ use colored::Colorize; use itertools::Itertools; use log::error; use rayon::iter::Either::{Left, Right}; -use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use similar::TextDiff; use thiserror::Error; use tracing::debug; use ruff_diagnostics::SourceMap; -use ruff_linter::fs; use ruff_linter::logging::LogLevel; use ruff_linter::source_kind::{SourceError, SourceKind}; use ruff_linter::warn_user_once; +use ruff_linter::{fs, warn_user}; use ruff_python_ast::{PySourceType, SourceType}; use ruff_python_formatter::{format_module_source, FormatModuleError}; use ruff_text_size::{TextLen, TextRange, TextSize}; -use ruff_workspace::resolver::{match_exclusion, python_files_in_path}; +use ruff_workspace::resolver::{match_exclusion, python_files_in_path, ResolvedFile}; use ruff_workspace::FormatterSettings; use crate::args::{CliOverrides, FormatArguments}; +use crate::cache::format::FormatCache; +use crate::cache::{FileCacheKey, PackageCacheMap, PackageCaches}; use crate::panic::{catch_unwind, PanicError}; use crate::resolve::resolve; use crate::ExitStatus; @@ -73,9 +75,35 @@ pub(crate) fn format( return Ok(ExitStatus::Success); } + // Discover the package root for each Python file. + let package_roots = resolver.package_roots( + &paths + .iter() + .flatten() + .map(ResolvedFile::path) + .collect::>(), + &pyproject_config, + ); + + let caches = if cli.no_cache { + None + } else { + // `--no-cache` doesn't respect code changes, and so is often confusing during + // development. + #[cfg(debug_assertions)] + warn_user!("Detected debug build without --no-cache."); + + Some(PackageCacheMap::init( + "format", + &pyproject_config, + &package_roots, + &resolver, + )) + }; + let start = Instant::now(); let (mut results, mut errors): (Vec<_>, Vec<_>) = paths - .into_par_iter() + .par_iter() .filter_map(|entry| { match entry { Ok(resolved_file) => { @@ -110,6 +138,13 @@ pub(crate) fn format( } }; + let package = path + .parent() + .and_then(|parent| package_roots.get(parent).copied()) + .flatten(); + let cache_root = package.unwrap_or_else(|| path.parent().unwrap_or(path)); + let cache = caches.get(cache_root); + Some( match catch_unwind(|| { format_path( @@ -118,21 +153,22 @@ pub(crate) fn format( &unformatted, source_type, mode, + cache, ) }) { Ok(inner) => inner.map(|result| FormatPathResult { - path: resolved_file.into_path(), + path: resolved_file.path().to_path_buf(), unformatted, result, }), Err(error) => Err(FormatCommandError::Panic( - Some(resolved_file.into_path()), + Some(resolved_file.path().to_path_buf()), error, )), }, ) } - Err(err) => Some(Err(FormatCommandError::Ignore(err))), + Err(err) => Some(Err(FormatCommandError::Ignore(err.clone()))), } }) .partition_map(|result| match result { @@ -168,6 +204,8 @@ pub(crate) fn format( duration ); + caches.store()?; + // Report on any errors. for error in &errors { error!("{error}"); @@ -214,13 +252,22 @@ pub(crate) fn format( /// Format the file at the given [`Path`]. #[tracing::instrument(level="debug", skip_all, fields(path = %path.display()))] -fn format_path( +pub(crate) fn format_path( path: &Path, settings: &FormatterSettings, unformatted: &SourceKind, source_type: PySourceType, mode: FormatMode, + cache: Option<&FormatCache>, ) -> Result { + if let Some(cache) = cache { + if let Ok(cache_key) = FileCacheKey::from_path(path) { + if cache.get(path, &cache_key).is_some() { + return Ok(FormatResult::Unchanged); + } + } + } + // Format the source. let format_result = match format_source(unformatted, source_type, Some(path), settings)? { FormattedSource::Formatted(formatted) => match mode { @@ -231,13 +278,29 @@ fn format_path( formatted .write(&mut writer) .map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?; + + if let Some(cache) = cache { + if let Ok(cache_key) = FileCacheKey::from_path(path) { + cache.update(path.to_path_buf(), &cache_key, ()); + } + } + FormatResult::Formatted } FormatMode::Check => FormatResult::Formatted, FormatMode::Diff => FormatResult::Diff(formatted), }, - FormattedSource::Unchanged => FormatResult::Unchanged, + FormattedSource::Unchanged => { + if let Some(cache) = cache { + if let Ok(cache_key) = FileCacheKey::from_path(path) { + cache.update(path.to_path_buf(), &cache_key, ()); + } + } + + FormatResult::Unchanged + } }; + Ok(format_result) } diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index c8af1330adac48..b9fb70fad14143 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -3,16 +3,14 @@ use std::fs::File; use std::io; use std::ops::{Add, AddAssign}; -#[cfg(unix)] -use std::os::unix::fs::PermissionsExt; use std::path::Path; use anyhow::{Context, Result}; use colored::Colorize; -use filetime::FileTime; use log::{debug, error, warn}; use rustc_hash::FxHashMap; +use crate::cache::FileCacheKey; use ruff_diagnostics::Diagnostic; use ruff_linter::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult}; use ruff_linter::logging::DisplayParseError; @@ -23,7 +21,6 @@ use ruff_linter::settings::types::UnsafeFixes; use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::source_kind::{SourceError, SourceKind}; use ruff_linter::{fs, IOError, SyntaxError}; -use ruff_macros::CacheKey; use ruff_notebook::{Notebook, NotebookError, NotebookIndex}; use ruff_python_ast::imports::ImportMap; use ruff_python_ast::{SourceType, TomlSourceType}; @@ -31,32 +28,7 @@ use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder}; use ruff_text_size::{TextRange, TextSize}; use ruff_workspace::Settings; -use crate::cache::Cache; - -#[derive(CacheKey)] -pub(crate) struct FileCacheKey { - /// Timestamp when the file was last modified before the (cached) check. - file_last_modified: FileTime, - /// Permissions of the file before the (cached) check. - file_permissions_mode: u32, -} - -impl FileCacheKey { - fn from_path(path: &Path) -> io::Result { - // Construct a cache key for the file - let metadata = path.metadata()?; - - #[cfg(unix)] - let permissions = metadata.permissions().mode(); - #[cfg(windows)] - let permissions: u32 = metadata.permissions().readonly().into(); - - Ok(FileCacheKey { - file_last_modified: FileTime::from_last_modification_time(&metadata), - file_permissions_mode: permissions, - }) - } -} +use crate::cache::lint::{LintCache, LintCacheData}; #[derive(Debug, Default, PartialEq)] pub(crate) struct Diagnostics { @@ -212,7 +184,7 @@ pub(crate) fn lint_path( path: &Path, package: Option<&Path>, settings: &LinterSettings, - cache: Option<&Cache>, + cache: Option<&LintCache>, noqa: flags::Noqa, fix_mode: flags::FixMode, unsafe_fixes: UnsafeFixes, @@ -334,10 +306,12 @@ pub(crate) fn lint_path( if parse_error.is_none() { cache.update( relative_path.to_owned(), - key, - &messages, - &imports, - source_kind.as_ipy_notebook().map(Notebook::index), + &key, + LintCacheData::from_messages( + &messages, + imports.clone(), + source_kind.as_ipy_notebook().map(Notebook::index).cloned(), + ), ); } } diff --git a/crates/ruff_cli/tests/format.rs b/crates/ruff_cli/tests/format.rs index 5fc8fc94fe3de0..6ec13b34356816 100644 --- a/crates/ruff_cli/tests/format.rs +++ b/crates/ruff_cli/tests/format.rs @@ -110,7 +110,7 @@ fn mixed_line_endings() -> Result<()> { assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) .current_dir(tempdir.path()) - .args(["format", "--diff", "--isolated"]) + .args(["format", "--no-cache", "--diff", "--isolated"]) .arg("."), @r###" success: true exit_code: 0 @@ -173,7 +173,7 @@ OTHER = "OTHER" assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) .current_dir(tempdir.path()) - .args(["format", "--check", "--config"]) + .args(["format", "--no-cache", "--check", "--config"]) .arg(ruff_toml.file_name().unwrap()) // Explicitly pass test.py, should be formatted regardless of it being excluded by format.exclude .arg(test_path.file_name().unwrap()) @@ -322,7 +322,7 @@ format = "json" #[test] fn test_diff() { - let args = ["format", "--isolated", "--diff"]; + let args = ["format", "--no-cache", "--isolated", "--diff"]; let fixtures = Path::new("resources").join("test").join("fixtures"); let paths = [ fixtures.join("unformatted.py"), @@ -365,7 +365,7 @@ fn test_diff() { #[test] fn test_diff_no_change() { - let args = ["format", "--isolated", "--diff"]; + let args = ["format", "--no-cache", "--isolated", "--diff"]; let fixtures = Path::new("resources").join("test").join("fixtures"); let paths = [fixtures.join("unformatted.py")]; insta::with_settings!({filters => vec![ diff --git a/crates/ruff_cli/tests/integration_test.rs b/crates/ruff_cli/tests/integration_test.rs index 36a25bd29e4ca5..d6b7b712b95ba9 100644 --- a/crates/ruff_cli/tests/integration_test.rs +++ b/crates/ruff_cli/tests/integration_test.rs @@ -1305,7 +1305,7 @@ extend-unsafe-fixes = ["UP034"] .args([ "--output-format", "text", - "--no-cache", + "--no-cache", "--select", "F601,UP034", ]) @@ -1344,7 +1344,7 @@ extend-safe-fixes = ["F601"] .args([ "--output-format", "text", - "--no-cache", + "--no-cache", "--select", "F601,UP034", ]) diff --git a/ruff-main b/ruff-main new file mode 100755 index 00000000000000..be529b78c4f58a Binary files /dev/null and b/ruff-main differ