From fe1a82abc730a92111a1ff930a8287d1053b09bd Mon Sep 17 00:00:00 2001 From: Krishn Parasar <76171905+Krishn1412@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:54:20 +0530 Subject: [PATCH 1/4] Adding max_length in secure_filename 1) adding max_length parameter which truncates the file but includes the ext. 2) This is presently set to None but can be set 255 characters. --- falcon/media/multipart.py | 9 +++++++++ falcon/util/misc.py | 13 +++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/falcon/media/multipart.py b/falcon/media/multipart.py index 99cd8785e..b4276eea9 100644 --- a/falcon/media/multipart.py +++ b/falcon/media/multipart.py @@ -592,6 +592,15 @@ class MultipartParseOptions: If the body part headers size exceeds this value, an instance of :class:`.MultipartParseError` will be raised. """ + max_filename_length: Optional[int] + """The maximum length of filenames in multipart forms (default ``None``). + + If the filename exceeds this value, it will be truncated while preserving + the file extension. Defaults to ``None`` for backward compatibility. + + .. note:: + Starting from Falcon 5.0, this will default to 255 characters. + """ media_handlers: Handlers """A dict-like object for configuring the media-types to handle. diff --git a/falcon/util/misc.py b/falcon/util/misc.py index ae17c12a1..62226831d 100644 --- a/falcon/util/misc.py +++ b/falcon/util/misc.py @@ -32,6 +32,7 @@ import re from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union import unicodedata +import os from falcon import status_codes from falcon.constants import PYPY @@ -336,7 +337,7 @@ def get_argnames(func: Callable[..., Any]) -> List[str]: return args -def secure_filename(filename: str) -> str: +def secure_filename(filename: str, max_length: int = None) -> str: """Sanitize the provided `filename` to contain only ASCII characters. Only ASCII alphanumerals, ``'.'``, ``'-'`` and ``'_'`` are allowed for @@ -357,6 +358,8 @@ def secure_filename(filename: str) -> str: Args: filename (str): Arbitrary filename input from the request, such as a multipart form filename field. + max_length (int, optional): Maximum length of the sanitized filename, + including the file extension. If None, no truncation is applied. Returns: str: The sanitized filename. @@ -373,7 +376,13 @@ def secure_filename(filename: str) -> str: filename = unicodedata.normalize('NFKD', filename) if filename.startswith('.'): filename = filename.replace('.', '_', 1) - return _UNSAFE_CHARS.sub('_', filename) + filename = _UNSAFE_CHARS.sub('_', filename) + + if max_length is not None and len(filename) > max_length: + name, ext = os.path.splitext(filename) + filename = name[:max_length - len(ext)] + ext + + return filename @_lru_cache_for_simple_logic(maxsize=64) From 626da2d0400e29546faa37c3dfc6ee7af9828dd2 Mon Sep 17 00:00:00 2001 From: Krishn Parasar <76171905+Krishn1412@users.noreply.github.com> Date: Fri, 13 Dec 2024 19:32:53 +0530 Subject: [PATCH 2/4] Updated code to handle test cases and comment --- falcon/util/misc.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/falcon/util/misc.py b/falcon/util/misc.py index 62226831d..0a512197c 100644 --- a/falcon/util/misc.py +++ b/falcon/util/misc.py @@ -29,10 +29,10 @@ import functools import http import inspect +import os import re from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union import unicodedata -import os from falcon import status_codes from falcon.constants import PYPY @@ -337,7 +337,7 @@ def get_argnames(func: Callable[..., Any]) -> List[str]: return args -def secure_filename(filename: str, max_length: int = None) -> str: +def secure_filename(filename: str, max_length: Optional[int] = None) -> str: """Sanitize the provided `filename` to contain only ASCII characters. Only ASCII alphanumerals, ``'.'``, ``'-'`` and ``'_'`` are allowed for @@ -380,7 +380,10 @@ def secure_filename(filename: str, max_length: int = None) -> str: if max_length is not None and len(filename) > max_length: name, ext = os.path.splitext(filename) - filename = name[:max_length - len(ext)] + ext + if max_length < len(ext): + filename = ext[:max_length] + else: + filename = name[: max_length - len(ext)] + ext return filename From 0c61a5eb20731b09426eb7d9d643ad074b10f824 Mon Sep 17 00:00:00 2001 From: Krishn Parasar <76171905+Krishn1412@users.noreply.github.com> Date: Tue, 17 Dec 2024 01:53:37 +0530 Subject: [PATCH 3/4] Updating logic and adding tests I have changed the logic as suggested in the review. For the unit tests, I have made direct function call to test the logic. --- falcon/util/misc.py | 6 +++--- tests/test_media_multipart.py | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/falcon/util/misc.py b/falcon/util/misc.py index 0a512197c..12a41ecd9 100644 --- a/falcon/util/misc.py +++ b/falcon/util/misc.py @@ -380,10 +380,10 @@ def secure_filename(filename: str, max_length: Optional[int] = None) -> str: if max_length is not None and len(filename) > max_length: name, ext = os.path.splitext(filename) - if max_length < len(ext): - filename = ext[:max_length] - else: + if max_length - len(ext) >= 1: filename = name[: max_length - len(ext)] + ext + else: + filename = filename[:max_length] return filename diff --git a/tests/test_media_multipart.py b/tests/test_media_multipart.py index b2ad8db3f..b56ad45f5 100644 --- a/tests/test_media_multipart.py +++ b/tests/test_media_multipart.py @@ -10,6 +10,7 @@ from falcon import testing from falcon.media.multipart import MultipartParseOptions from falcon.util import BufferedReader +from falcon.util.misc import secure_filename try: import msgpack @@ -747,6 +748,14 @@ def test_unsupported_charset(client): } +def test_max_length_filename(client): + filename = secure_filename('averylongfilename.txt', 17) + assert filename == 'averylongfile.txt' + + filename = secure_filename('file.withlongextension', 9) + assert filename == 'file.with' + + def test_filename_star(client): # NOTE(vytas): Generated by requests_toolbelt 0.9.1 on Py2 # (interestingly, one gets a "normal" filename on Py3.7). From c37a6b810b1f0c9709d71ca54c12637ee2046b12 Mon Sep 17 00:00:00 2001 From: Krishn Parasar <76171905+Krishn1412@users.noreply.github.com> Date: Mon, 23 Dec 2024 13:31:37 +0530 Subject: [PATCH 4/4] Updating test cases --- falcon/util/misc.py | 2 +- tests/test_media_multipart.py | 9 --------- tests/test_utils.py | 11 +++++++++++ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/falcon/util/misc.py b/falcon/util/misc.py index 12a41ecd9..4d55887ec 100644 --- a/falcon/util/misc.py +++ b/falcon/util/misc.py @@ -358,7 +358,7 @@ def secure_filename(filename: str, max_length: Optional[int] = None) -> str: Args: filename (str): Arbitrary filename input from the request, such as a multipart form filename field. - max_length (int, optional): Maximum length of the sanitized filename, + max_length: Maximum length of the sanitized filename, including the file extension. If None, no truncation is applied. Returns: diff --git a/tests/test_media_multipart.py b/tests/test_media_multipart.py index b56ad45f5..b2ad8db3f 100644 --- a/tests/test_media_multipart.py +++ b/tests/test_media_multipart.py @@ -10,7 +10,6 @@ from falcon import testing from falcon.media.multipart import MultipartParseOptions from falcon.util import BufferedReader -from falcon.util.misc import secure_filename try: import msgpack @@ -748,14 +747,6 @@ def test_unsupported_charset(client): } -def test_max_length_filename(client): - filename = secure_filename('averylongfilename.txt', 17) - assert filename == 'averylongfile.txt' - - filename = secure_filename('file.withlongextension', 9) - assert filename == 'file.with' - - def test_filename_star(client): # NOTE(vytas): Generated by requests_toolbelt 0.9.1 on Py2 # (interestingly, one gets a "normal" filename on Py3.7). diff --git a/tests/test_utils.py b/tests/test_utils.py index 14da664f7..b93cfcf07 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -628,6 +628,17 @@ def test_misc_isascii(self): with pytest.warns(deprecation.DeprecatedWarning): assert misc.isascii('foobar') + @pytest.mark.parametrize( + 'filename,max_length,expected', + [ + ('averylongfilename.txt', 17, 'averylongfile.txt'), + ('short.txt', 10, 'short.txt'), + ('file.withlongextension', 9, 'file.with'), + ], + ) + def test_secure_filename_max_length(self, filename, max_length, expected): + assert misc.secure_filename(filename, max_length=max_length) == expected + @pytest.mark.parametrize( 'protocol,method',