From 79a309fe54dc6b7934fb72e9f31bcb58f2e9f547 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 31 May 2024 11:20:57 -0400 Subject: [PATCH 1/4] Add some assertions about malformed paths. --- tests/test_path.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_path.py b/tests/test_path.py index bbf39b2..53f8848 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -571,3 +571,21 @@ def test_getinfo_missing(self, alpharep): zipfile.Path(alpharep) with self.assertRaises(KeyError): alpharep.getinfo('does-not-exist') + + @__import__('pytest').mark.skip(reason="infinite loop") + def test_malformed_paths(self): + """ + Path should handle malformed paths. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("/one-slash.txt", b"content") + zf.writestr("//two-slash.txt", b"content") + zf.writestr("../parent.txt", b"content") + zf.filename = '' + root = zipfile.Path(zf) + assert list(map(str, root.iterdir())) == [ + 'one-slash.txt', + 'two-slash.txt', + 'parent.txt', + ] From 564fcc10cdbfdaecdb33688e149827465931c9e0 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 31 May 2024 11:56:42 -0400 Subject: [PATCH 2/4] Add SanitizedNames mixin. --- zipp/__init__.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/zipp/__init__.py b/zipp/__init__.py index b08016c..b2f5013 100644 --- a/zipp/__init__.py +++ b/zipp/__init__.py @@ -86,6 +86,68 @@ def __setstate__(self, state): super().__init__(*args, **kwargs) +class SanitizedNames: + """ + ZipFile mix-in to ensure names are sanitized. + """ + + def namelist(self): + return list(map(self._sanitize, super().namelist())) + + @staticmethod + def _sanitize(name): + r""" + Ensure a relative path with posix separators and no dot names. + + Modeled after + https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813 + but provides consistent cross-platform behavior. + + >>> san = SanitizedNames._sanitize + >>> san('/foo/bar') + 'foo/bar' + >>> san('//foo.txt') + 'foo.txt' + >>> san('foo/.././bar.txt') + 'foo/bar.txt' + >>> san('foo../.bar.txt') + 'foo../.bar.txt' + >>> san('\\foo\\bar.txt') + 'foo/bar.txt' + >>> san('D:\\foo.txt') + 'D/foo.txt' + >>> san('\\\\server\\share\\file.txt') + 'server/share/file.txt' + >>> san('\\\\?\\GLOBALROOT\\Volume3') + '?/GLOBALROOT/Volume3' + >>> san('\\\\.\\PhysicalDrive1\\root') + 'PhysicalDrive1/root' + + Retain any trailing slash. + >>> san('abc/') + 'abc/' + + Raises a ValueError if the result is empty. + >>> san('../..') + Traceback (most recent call last): + ... + ValueError: Empty filename + """ + + def allowed(part): + return part and part not in {'..', '.'} + + # Remove the drive letter. + # Don't use ntpath.splitdrive, because that also strips UNC paths + bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE) + clean = bare.replace('\\', '/') + parts = clean.split('/') + joined = '/'.join(filter(allowed, parts)) + if not joined: + raise ValueError("Empty filename") + return joined + '/' * name.endswith('/') + + class CompleteDirs(InitializedState, zipfile.ZipFile): """ A ZipFile subclass that ensures that implied directories From 58115d2be968644ce71ce6bcc9b79826c82a1806 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 31 May 2024 12:24:39 -0400 Subject: [PATCH 3/4] Employ SanitizedNames in CompleteDirs. Fixes broken test. --- tests/test_path.py | 1 - zipp/__init__.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_path.py b/tests/test_path.py index 53f8848..ace8763 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -572,7 +572,6 @@ def test_getinfo_missing(self, alpharep): with self.assertRaises(KeyError): alpharep.getinfo('does-not-exist') - @__import__('pytest').mark.skip(reason="infinite loop") def test_malformed_paths(self): """ Path should handle malformed paths. diff --git a/zipp/__init__.py b/zipp/__init__.py index b2f5013..f41ae08 100644 --- a/zipp/__init__.py +++ b/zipp/__init__.py @@ -148,7 +148,7 @@ def allowed(part): return joined + '/' * name.endswith('/') -class CompleteDirs(InitializedState, zipfile.ZipFile): +class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile): """ A ZipFile subclass that ensures that implied directories are always included in the namelist. From c18417ed2953e181728a7dac07bff88a2190abf7 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Fri, 31 May 2024 12:30:05 -0400 Subject: [PATCH 4/4] Add news fragment. --- newsfragments/119.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/119.bugfix.rst diff --git a/newsfragments/119.bugfix.rst b/newsfragments/119.bugfix.rst new file mode 100644 index 0000000..6c72e2d --- /dev/null +++ b/newsfragments/119.bugfix.rst @@ -0,0 +1 @@ +Improved handling of malformed zip files. \ No newline at end of file