diff --git a/poetry.lock b/poetry.lock index b9b3819b3..5e6d66e02 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "absolufy-imports" @@ -1646,17 +1646,6 @@ pillow = ">=6.2.0" pyparsing = ">=2.3.1" python-dateutil = ">=2.7" -[[package]] -name = "more-itertools" -version = "10.3.0" -description = "More routines for operating on iterables, beyond itertools" -optional = false -python-versions = ">=3.8" -files = [ - {file = "more-itertools-10.3.0.tar.gz", hash = "sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463"}, - {file = "more_itertools-10.3.0-py3-none-any.whl", hash = "sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320"}, -] - [[package]] name = "mypy" version = "0.991" @@ -1849,8 +1838,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3715,4 +3704,4 @@ all = ["CairoSVG", "Flask", "better-exceptions", "datadog-api-client", "grafana- [metadata] lock-version = "2.0" python-versions = "^3.8, <3.12" -content-hash = "34fe45a868b57cab6892af478146f6dbf67ac94703274cc87af3ef201326a9bf" +content-hash = "7f99aff9c3e559e8bf47f5541646cbb47b38a8f6d384d442ec604bff660ae5be" diff --git a/pyproject.toml b/pyproject.toml index 5a0ffe959..adcd58044 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,6 @@ prometheus-api-client = "0.5.4" requests = "^2.32.3" certifi = "^2023.7.22" regex = "2024.5.15" -more_itertools = { version = "^10.3", python = "<3.12" } [tool.poetry.dev-dependencies] pre-commit = "^2.13.0" diff --git a/src/robusta/core/sinks/transformer.py b/src/robusta/core/sinks/transformer.py index 36a3d3441..c3417b1ee 100644 --- a/src/robusta/core/sinks/transformer.py +++ b/src/robusta/core/sinks/transformer.py @@ -27,7 +27,6 @@ def tabulate(*args, **kwargs): ScanReportBlock, TableBlock, ) -from robusta.utils.trim_markdown import trim_markdown class Transformer: @@ -58,10 +57,36 @@ def apply_length_limit(msg: str, max_length: int, truncator: Optional[str] = Non truncator = truncator or "..." return msg[: max_length - len(truncator)] + truncator + @staticmethod + def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str: + if len(text) <= max_length: + return text + if max_length <= len(suffix): + return suffix[:max_length] + if '```' not in text: + return Transformer.apply_length_limit(text, max_length, suffix) + + suffix_len = len(suffix) + code_markdown_len = len('```') + truncate_index = max_length - suffix_len + + # edge case, last few characters contains a partial codeblock '`' character + # we shorten by a few extra characters so we don't accidentally write ```` + end_buffer_index = max(truncate_index - code_markdown_len*2 - 1, 0) + if '`' in text[truncate_index:max_length] and '```' in text[end_buffer_index:max_length]: + truncate_index = end_buffer_index + + count_removed_code_annotation = text.count('```', truncate_index, len(text)) + needs_end_code_annotation = (count_removed_code_annotation % 2 == 1) # if there is an odd number of ``` removed + if needs_end_code_annotation: + return text[:truncate_index - code_markdown_len] + suffix + '```' + else: + return text[:truncate_index] + suffix + @staticmethod def apply_length_limit_to_markdown(msg: str, max_length: int, truncator: str = "...") -> str: try: - return trim_markdown(msg, max_length, truncator) + return Transformer.trim_markdown(msg, max_length, truncator) except: return Transformer.apply_length_limit(msg, max_length, truncator) diff --git a/src/robusta/utils/trim_markdown.py b/src/robusta/utils/trim_markdown.py deleted file mode 100644 index 10418b454..000000000 --- a/src/robusta/utils/trim_markdown.py +++ /dev/null @@ -1,57 +0,0 @@ -try: - from itertools import batched -except ImportError: # Python < 3.12 - from more_itertools import batched - -import regex - - -def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str: - # This method of trimming markdown is not universal. It only takes care of correctly - # trimming block sections. Implementing a general truncation method for markdown that - # would handle all the possible tags in a correct way would be rather complex. - - trim_idx = max_length - len(suffix) - - if trim_idx <= 0: # The pathological cases. - return suffix[:max_length] - - # Process block quotes backwards in the input - for match_open, match_close in batched(regex.finditer("```", text, regex.REVERSE), 2): - open_start, open_end = match_close.span() - close_start, close_end = match_open.span() - if trim_idx >= close_end: - # Trimming point after this block quote - return text[:trim_idx] + suffix - elif trim_idx < open_start: - # Trimming point before this block quote - continue to the preceding block - continue - elif trim_idx >= open_start and trim_idx < open_start + 3: - # Trimming point inside the opening block quote tag - return text[:trim_idx].rstrip("`") + suffix - elif trim_idx >= close_start and trim_idx < close_end: - # Trimming point inside the closing block quote tag - if trim_idx - open_end >= 3: # Enough space to insert the closing tag - return text[:trim_idx - 3] + "```" + suffix - else: # Not enough space, strip the whole block - return text[:open_start] + suffix - elif trim_idx >= open_end and trim_idx < close_start: - # Trimming point inside the block quote - if trim_idx - open_end >= 3: # Enough space to insert the closing tag - return text[:trim_idx - 3] + "```" + suffix - else: # Not enough space, strip the whole block - return text[:open_start] + suffix - else: - # This should never happen - raise Exception( - f'Internal error in trim_markdown, text="{text[:12]}"(...), {max_length=}, suffix="{suffix}", ' - f'matched code block {open_start}..{close_end}' - ) - - # Cases when there were no code blocks in the input - if len(text) <= trim_idx: - return text - elif len(text) < max_length: - return (text[:trim_idx] + suffix)[:max_length] - else: - return text[:trim_idx] + suffix diff --git a/tests/test_trim_markdown.py b/tests/test_trim_markdown.py index c708e9c32..5801d76a0 100644 --- a/tests/test_trim_markdown.py +++ b/tests/test_trim_markdown.py @@ -1,64 +1,68 @@ import pytest -from robusta.utils.trim_markdown import trim_markdown +from robusta.core.sinks.transformer import Transformer @pytest.mark.parametrize( "max_length,expected_output", [ - (0, ""), - (1, "#"), - (2, "##"), - (3, "##"), - (4, "##"), - (5, "##"), - (6, "##"), - (7, "##"), - (8, "``````##"), - (9, "```o```##"), - (10, "```oh```##"), - (13, "```oh``` he##"), - (16, "```oh``` hello##"), - (17, "```oh``` hello ##"), - (18, "```oh``` hello ##"), - (19, "```oh``` hello ##"), - (20, "```oh``` hello ##"), - (21, "```oh``` hello ##"), - (22, "```oh``` hello ##"), - (23, "```oh``` hello ``````##"), - (24, "```oh``` hello ```w```##"), - (25, "```oh``` hello ```wo```##"), - (27, "```oh``` hello ```worl```##"), - (28, "```oh``` hello ```world```##"), - (29, "```oh``` hello ```world``` ##"), - (31, "```oh``` hello ```world``` an##"), - (39, "```oh``` hello ```world``` and then ##"), - (42, "```oh``` hello ```world``` and then ##"), - (44, "```oh``` hello ```world``` and then ``````##"), - (48, "```oh``` hello ```world``` and then ```some```##"), - (52, "```oh``` hello ```world``` and then ```somethin```##"), - (53, "```oh``` hello ```world``` and then ```something```##"), - (54, "```oh``` hello ```world``` and then ```something```##"), - (111, "```oh``` hello ```world``` and then ```something```##"), + (9, "```...```"), + (10, "```t...```"), + (13, "```test...```"), + (16, "```testing...```"), + (28, "```testing 12345667 so...```"), + (29, "```testing 12345667 som...```"), + (31, "```testing 12345667 some ...```"), + (35, "```testing 12345667 some more...```"), + (36, "```testing 12345667 some more ...```"), + (37, "```testing 12345667 some more text```"), + (53, "```testing 12345667 some more text```"), + (54, "```testing 12345667 some more text```"), + (111, "```testing 12345667 some more text```"), ]) def test_trim_markdown(max_length: int, expected_output: str): - text = "```oh``` hello ```world``` and then ```something```" - trimmed = trim_markdown(text, max_length, "##") + text = "```testing 12345667 some more text```" + trimmed = Transformer.trim_markdown(text, max_length, "...") + assert trimmed == expected_output + assert len(trimmed) <= max_length + +@pytest.mark.parametrize( + "max_length,expected_output", [ + (9, "```...```"), + (10, "```t...```"), + (13, "```test...```"), + (31, "```testing 12345667 some ...```"), + (36, "```testing 12345667 some more ...```"), + + # edge case, last few characters contains a partial codeblock '`' + # we cut off a few extra characters so we dont accidentally write ```` + (37, "```testing 12345667 some...```"), + (38, "```testing 12345667 some ...```"), + (39, "```testing 12345667 some m...```"), + + (40, "```testing 12345667 some more text```..."), + (43, "```testing 12345667 some more text``` so..."), + (52, "```testing 12345667 some more text``` some text a..."), + (53, "```testing 12345667 some more text``` some text af..."), + (54, "```testing 12345667 some more text``` some text aft..."), + (76, "```testing 12345667 some more text``` some text after stuff sdkljhadsflka..."), + (77, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkas..."), + (78, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl"), + (100, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl"), + ]) +def test_trim_markdown_with_text(max_length: int, expected_output: str): + text = "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl" + trimmed = Transformer.trim_markdown(text, max_length, "...") + print(f"{trimmed}") assert trimmed == expected_output assert len(trimmed) <= max_length @pytest.mark.parametrize( "max_length,expected_output", [ - (0, ""), - (1, "$"), - (2, "$$"), (3, "$$$"), (4, "N$$$"), (5, "No$$$"), (10, "No code$$$"), - (38, "No code blocks whatsoever in this t$$$"), - (39, "No code blocks whatsoever in this te$$$"), - (40, "No code blocks whatsoever in this tex$$$"), (41, "No code blocks whatsoever in this text"), (42, "No code blocks whatsoever in this text"), (111, "No code blocks whatsoever in this text"), @@ -66,6 +70,6 @@ def test_trim_markdown(max_length: int, expected_output: str): ) def test_trim_markdown_no_code_blocks(max_length: int, expected_output: str): text = "No code blocks whatsoever in this text" - trimmed = trim_markdown(text, max_length, "$$$") + trimmed = Transformer.trim_markdown(text, max_length, "$$$") assert trimmed == expected_output assert len(trimmed) <= max_length