From 1c87218a1d47b52c8e1c48d2afdd619b12be2ff6 Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Wed, 17 Jul 2024 10:02:10 +0300 Subject: [PATCH 1/6] code markdown optomise --- src/robusta/core/sinks/transformer.py | 4 ++ src/robusta/utils/trim_markdown.py | 61 +++++---------------------- 2 files changed, 15 insertions(+), 50 deletions(-) diff --git a/src/robusta/core/sinks/transformer.py b/src/robusta/core/sinks/transformer.py index 36a3d3441..5859e2e50 100644 --- a/src/robusta/core/sinks/transformer.py +++ b/src/robusta/core/sinks/transformer.py @@ -61,6 +61,10 @@ def apply_length_limit(msg: str, max_length: int, truncator: Optional[str] = Non @staticmethod def apply_length_limit_to_markdown(msg: str, max_length: int, truncator: str = "...") -> str: try: + if len(msg) < max_length: + return msg + if '```' not in msg: + return Transformer.apply_length_limit(msg, max_length, truncator) return trim_markdown(msg, max_length, truncator) except: return Transformer.apply_length_limit(msg, max_length, truncator) diff --git a/src/robusta/utils/trim_markdown.py b/src/robusta/utils/trim_markdown.py index 10418b454..ce40a8edf 100644 --- a/src/robusta/utils/trim_markdown.py +++ b/src/robusta/utils/trim_markdown.py @@ -1,57 +1,18 @@ try: - from itertools import batched -except ImportError: # Python < 3.12 from more_itertools import batched -import regex - - def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str: - # This method of trimming markdown is not universal. It only takes care of correctly - # trimming block sections. Implementing a general truncation method for markdown that - # would handle all the possible tags in a correct way would be rather complex. - - trim_idx = max_length - len(suffix) - - if trim_idx <= 0: # The pathological cases. - return suffix[:max_length] + suffix_len = len(suffix) + code_markdown_len = len('```') + tuncate_index = max_length - suffix_len - # Process block quotes backwards in the input - for match_open, match_close in batched(regex.finditer("```", text, regex.REVERSE), 2): - open_start, open_end = match_close.span() - close_start, close_end = match_open.span() - if trim_idx >= close_end: - # Trimming point after this block quote - return text[:trim_idx] + suffix - elif trim_idx < open_start: - # Trimming point before this block quote - continue to the preceding block - continue - elif trim_idx >= open_start and trim_idx < open_start + 3: - # Trimming point inside the opening block quote tag - return text[:trim_idx].rstrip("`") + suffix - elif trim_idx >= close_start and trim_idx < close_end: - # Trimming point inside the closing block quote tag - if trim_idx - open_end >= 3: # Enough space to insert the closing tag - return text[:trim_idx - 3] + "```" + suffix - else: # Not enough space, strip the whole block - return text[:open_start] + suffix - elif trim_idx >= open_end and trim_idx < close_start: - # Trimming point inside the block quote - if trim_idx - open_end >= 3: # Enough space to insert the closing tag - return text[:trim_idx - 3] + "```" + suffix - else: # Not enough space, strip the whole block - return text[:open_start] + suffix - else: - # This should never happen - raise Exception( - f'Internal error in trim_markdown, text="{text[:12]}"(...), {max_length=}, suffix="{suffix}", ' - f'matched code block {open_start}..{close_end}' - ) + # if there is a code annotation near the end of the string + if '```' in text[tuncate_index - code_markdown_len*2:tuncate_index]: + tuncate_index = tuncate_index - code_markdown_len*2 - # Cases when there were no code blocks in the input - if len(text) <= trim_idx: - return text - elif len(text) < max_length: - return (text[:trim_idx] + suffix)[:max_length] + code_annotation_truncat_count = text.count('```', __start=tuncate_index) + needs_end_markdown_string = (code_annotation_truncat_count % 2 == 1) # if there is an odd number of markdowns on the right + if needs_end_markdown_string: + return (text[:tuncate_index - code_markdown_len - suffix_len] + '```' + suffix)[:tuncate_index] else: - return text[:trim_idx] + suffix + return text[:tuncate_index - suffix_len] + suffix From 5bdbe80f8c62fd14032610cf83d0aba1bb47300d Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Wed, 17 Jul 2024 10:22:56 +0300 Subject: [PATCH 2/6] remove exception --- src/robusta/utils/trim_markdown.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/robusta/utils/trim_markdown.py b/src/robusta/utils/trim_markdown.py index ce40a8edf..bdb6132c4 100644 --- a/src/robusta/utils/trim_markdown.py +++ b/src/robusta/utils/trim_markdown.py @@ -1,5 +1,3 @@ -try: - from more_itertools import batched def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str: suffix_len = len(suffix) From 2c231d034dfa1b67eb9fafdfcbe1bac76b0a9382 Mon Sep 17 00:00:00 2001 From: avi robusta Date: Wed, 17 Jul 2024 10:25:51 +0300 Subject: [PATCH 3/6] removed iter-tools --- poetry.lock | 17 +++-------------- pyproject.toml | 1 - 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/poetry.lock b/poetry.lock index b9b3819b3..5e6d66e02 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "absolufy-imports" @@ -1646,17 +1646,6 @@ pillow = ">=6.2.0" pyparsing = ">=2.3.1" python-dateutil = ">=2.7" -[[package]] -name = "more-itertools" -version = "10.3.0" -description = "More routines for operating on iterables, beyond itertools" -optional = false -python-versions = ">=3.8" -files = [ - {file = "more-itertools-10.3.0.tar.gz", hash = "sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463"}, - {file = "more_itertools-10.3.0-py3-none-any.whl", hash = "sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320"}, -] - [[package]] name = "mypy" version = "0.991" @@ -1849,8 +1838,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3715,4 +3704,4 @@ all = ["CairoSVG", "Flask", "better-exceptions", "datadog-api-client", "grafana- [metadata] lock-version = "2.0" python-versions = "^3.8, <3.12" -content-hash = "34fe45a868b57cab6892af478146f6dbf67ac94703274cc87af3ef201326a9bf" +content-hash = "7f99aff9c3e559e8bf47f5541646cbb47b38a8f6d384d442ec604bff660ae5be" diff --git a/pyproject.toml b/pyproject.toml index 5a0ffe959..adcd58044 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,6 @@ prometheus-api-client = "0.5.4" requests = "^2.32.3" certifi = "^2023.7.22" regex = "2024.5.15" -more_itertools = { version = "^10.3", python = "<3.12" } [tool.poetry.dev-dependencies] pre-commit = "^2.13.0" From 2caf575f4416659a23cdf965bf1353e40f8967c3 Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Wed, 17 Jul 2024 11:43:40 +0300 Subject: [PATCH 4/6] reformated trim_markdown --- src/robusta/core/sinks/transformer.py | 28 +++++++++++++++++++++------ src/robusta/utils/trim_markdown.py | 16 --------------- tests/test_trim_markdown.py | 4 ++-- 3 files changed, 24 insertions(+), 24 deletions(-) delete mode 100644 src/robusta/utils/trim_markdown.py diff --git a/src/robusta/core/sinks/transformer.py b/src/robusta/core/sinks/transformer.py index 5859e2e50..c56f4da28 100644 --- a/src/robusta/core/sinks/transformer.py +++ b/src/robusta/core/sinks/transformer.py @@ -27,7 +27,6 @@ def tabulate(*args, **kwargs): ScanReportBlock, TableBlock, ) -from robusta.utils.trim_markdown import trim_markdown class Transformer: @@ -58,14 +57,31 @@ def apply_length_limit(msg: str, max_length: int, truncator: Optional[str] = Non truncator = truncator or "..." return msg[: max_length - len(truncator)] + truncator + @staticmethod + def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str: + if len(text) < max_length: + return text + if '```' not in text: + return Transformer.apply_length_limit(text, max_length, suffix) + suffix_len = len(suffix) + code_markdown_len = len('```') + tuncate_index = max_length - suffix_len + + # if there is a code annotation near the end of the string + if '```' in text[tuncate_index - code_markdown_len*2:tuncate_index]: + tuncate_index = tuncate_index - code_markdown_len*2 + + code_annotation_truncat_count = text.count('```', __start=tuncate_index) + needs_end_markdown_string = (code_annotation_truncat_count % 2 == 1) # if there is an odd number of markdowns on the right + if needs_end_markdown_string: + return (text[:tuncate_index - code_markdown_len - suffix_len] + '```' + suffix)[:tuncate_index] + else: + return text[:tuncate_index - suffix_len] + suffix + @staticmethod def apply_length_limit_to_markdown(msg: str, max_length: int, truncator: str = "...") -> str: try: - if len(msg) < max_length: - return msg - if '```' not in msg: - return Transformer.apply_length_limit(msg, max_length, truncator) - return trim_markdown(msg, max_length, truncator) + return Transformer.trim_markdown(msg, max_length, truncator) except: return Transformer.apply_length_limit(msg, max_length, truncator) diff --git a/src/robusta/utils/trim_markdown.py b/src/robusta/utils/trim_markdown.py deleted file mode 100644 index bdb6132c4..000000000 --- a/src/robusta/utils/trim_markdown.py +++ /dev/null @@ -1,16 +0,0 @@ - -def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str: - suffix_len = len(suffix) - code_markdown_len = len('```') - tuncate_index = max_length - suffix_len - - # if there is a code annotation near the end of the string - if '```' in text[tuncate_index - code_markdown_len*2:tuncate_index]: - tuncate_index = tuncate_index - code_markdown_len*2 - - code_annotation_truncat_count = text.count('```', __start=tuncate_index) - needs_end_markdown_string = (code_annotation_truncat_count % 2 == 1) # if there is an odd number of markdowns on the right - if needs_end_markdown_string: - return (text[:tuncate_index - code_markdown_len - suffix_len] + '```' + suffix)[:tuncate_index] - else: - return text[:tuncate_index - suffix_len] + suffix diff --git a/tests/test_trim_markdown.py b/tests/test_trim_markdown.py index c708e9c32..c7be88927 100644 --- a/tests/test_trim_markdown.py +++ b/tests/test_trim_markdown.py @@ -1,6 +1,6 @@ import pytest -from robusta.utils.trim_markdown import trim_markdown +from robusta.core.sinks.transformer import Transformer @pytest.mark.parametrize( @@ -42,7 +42,7 @@ ]) def test_trim_markdown(max_length: int, expected_output: str): text = "```oh``` hello ```world``` and then ```something```" - trimmed = trim_markdown(text, max_length, "##") + trimmed = Transformer.trim_markdown(text, max_length, "##") assert trimmed == expected_output assert len(trimmed) <= max_length From b147bd745084b1f59dca06cd76ea7046f0eea99a Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Wed, 17 Jul 2024 12:05:46 +0300 Subject: [PATCH 5/6] pytest fix --- tests/test_trim_markdown.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_trim_markdown.py b/tests/test_trim_markdown.py index c7be88927..ed77a8f63 100644 --- a/tests/test_trim_markdown.py +++ b/tests/test_trim_markdown.py @@ -66,6 +66,6 @@ def test_trim_markdown(max_length: int, expected_output: str): ) def test_trim_markdown_no_code_blocks(max_length: int, expected_output: str): text = "No code blocks whatsoever in this text" - trimmed = trim_markdown(text, max_length, "$$$") + trimmed = Transformer.trim_markdown(text, max_length, "$$$") assert trimmed == expected_output assert len(trimmed) <= max_length From 97b69e9e94155cf2735c7dd21da47c44df0675d6 Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Wed, 17 Jul 2024 14:36:40 +0300 Subject: [PATCH 6/6] fix truncation --- src/robusta/core/sinks/transformer.py | 29 +++++---- tests/test_trim_markdown.py | 88 ++++++++++++++------------- 2 files changed, 63 insertions(+), 54 deletions(-) diff --git a/src/robusta/core/sinks/transformer.py b/src/robusta/core/sinks/transformer.py index c56f4da28..c3417b1ee 100644 --- a/src/robusta/core/sinks/transformer.py +++ b/src/robusta/core/sinks/transformer.py @@ -59,24 +59,29 @@ def apply_length_limit(msg: str, max_length: int, truncator: Optional[str] = Non @staticmethod def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str: - if len(text) < max_length: + if len(text) <= max_length: return text + if max_length <= len(suffix): + return suffix[:max_length] if '```' not in text: return Transformer.apply_length_limit(text, max_length, suffix) + suffix_len = len(suffix) code_markdown_len = len('```') - tuncate_index = max_length - suffix_len - - # if there is a code annotation near the end of the string - if '```' in text[tuncate_index - code_markdown_len*2:tuncate_index]: - tuncate_index = tuncate_index - code_markdown_len*2 - - code_annotation_truncat_count = text.count('```', __start=tuncate_index) - needs_end_markdown_string = (code_annotation_truncat_count % 2 == 1) # if there is an odd number of markdowns on the right - if needs_end_markdown_string: - return (text[:tuncate_index - code_markdown_len - suffix_len] + '```' + suffix)[:tuncate_index] + truncate_index = max_length - suffix_len + + # edge case, last few characters contains a partial codeblock '`' character + # we shorten by a few extra characters so we don't accidentally write ```` + end_buffer_index = max(truncate_index - code_markdown_len*2 - 1, 0) + if '`' in text[truncate_index:max_length] and '```' in text[end_buffer_index:max_length]: + truncate_index = end_buffer_index + + count_removed_code_annotation = text.count('```', truncate_index, len(text)) + needs_end_code_annotation = (count_removed_code_annotation % 2 == 1) # if there is an odd number of ``` removed + if needs_end_code_annotation: + return text[:truncate_index - code_markdown_len] + suffix + '```' else: - return text[:tuncate_index - suffix_len] + suffix + return text[:truncate_index] + suffix @staticmethod def apply_length_limit_to_markdown(msg: str, max_length: int, truncator: str = "...") -> str: diff --git a/tests/test_trim_markdown.py b/tests/test_trim_markdown.py index ed77a8f63..5801d76a0 100644 --- a/tests/test_trim_markdown.py +++ b/tests/test_trim_markdown.py @@ -5,60 +5,64 @@ @pytest.mark.parametrize( "max_length,expected_output", [ - (0, ""), - (1, "#"), - (2, "##"), - (3, "##"), - (4, "##"), - (5, "##"), - (6, "##"), - (7, "##"), - (8, "``````##"), - (9, "```o```##"), - (10, "```oh```##"), - (13, "```oh``` he##"), - (16, "```oh``` hello##"), - (17, "```oh``` hello ##"), - (18, "```oh``` hello ##"), - (19, "```oh``` hello ##"), - (20, "```oh``` hello ##"), - (21, "```oh``` hello ##"), - (22, "```oh``` hello ##"), - (23, "```oh``` hello ``````##"), - (24, "```oh``` hello ```w```##"), - (25, "```oh``` hello ```wo```##"), - (27, "```oh``` hello ```worl```##"), - (28, "```oh``` hello ```world```##"), - (29, "```oh``` hello ```world``` ##"), - (31, "```oh``` hello ```world``` an##"), - (39, "```oh``` hello ```world``` and then ##"), - (42, "```oh``` hello ```world``` and then ##"), - (44, "```oh``` hello ```world``` and then ``````##"), - (48, "```oh``` hello ```world``` and then ```some```##"), - (52, "```oh``` hello ```world``` and then ```somethin```##"), - (53, "```oh``` hello ```world``` and then ```something```##"), - (54, "```oh``` hello ```world``` and then ```something```##"), - (111, "```oh``` hello ```world``` and then ```something```##"), + (9, "```...```"), + (10, "```t...```"), + (13, "```test...```"), + (16, "```testing...```"), + (28, "```testing 12345667 so...```"), + (29, "```testing 12345667 som...```"), + (31, "```testing 12345667 some ...```"), + (35, "```testing 12345667 some more...```"), + (36, "```testing 12345667 some more ...```"), + (37, "```testing 12345667 some more text```"), + (53, "```testing 12345667 some more text```"), + (54, "```testing 12345667 some more text```"), + (111, "```testing 12345667 some more text```"), ]) def test_trim_markdown(max_length: int, expected_output: str): - text = "```oh``` hello ```world``` and then ```something```" - trimmed = Transformer.trim_markdown(text, max_length, "##") + text = "```testing 12345667 some more text```" + trimmed = Transformer.trim_markdown(text, max_length, "...") + assert trimmed == expected_output + assert len(trimmed) <= max_length + +@pytest.mark.parametrize( + "max_length,expected_output", [ + (9, "```...```"), + (10, "```t...```"), + (13, "```test...```"), + (31, "```testing 12345667 some ...```"), + (36, "```testing 12345667 some more ...```"), + + # edge case, last few characters contains a partial codeblock '`' + # we cut off a few extra characters so we dont accidentally write ```` + (37, "```testing 12345667 some...```"), + (38, "```testing 12345667 some ...```"), + (39, "```testing 12345667 some m...```"), + + (40, "```testing 12345667 some more text```..."), + (43, "```testing 12345667 some more text``` so..."), + (52, "```testing 12345667 some more text``` some text a..."), + (53, "```testing 12345667 some more text``` some text af..."), + (54, "```testing 12345667 some more text``` some text aft..."), + (76, "```testing 12345667 some more text``` some text after stuff sdkljhadsflka..."), + (77, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkas..."), + (78, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl"), + (100, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl"), + ]) +def test_trim_markdown_with_text(max_length: int, expected_output: str): + text = "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl" + trimmed = Transformer.trim_markdown(text, max_length, "...") + print(f"{trimmed}") assert trimmed == expected_output assert len(trimmed) <= max_length @pytest.mark.parametrize( "max_length,expected_output", [ - (0, ""), - (1, "$"), - (2, "$$"), (3, "$$$"), (4, "N$$$"), (5, "No$$$"), (10, "No code$$$"), - (38, "No code blocks whatsoever in this t$$$"), - (39, "No code blocks whatsoever in this te$$$"), - (40, "No code blocks whatsoever in this tex$$$"), (41, "No code blocks whatsoever in this text"), (42, "No code blocks whatsoever in this text"), (111, "No code blocks whatsoever in this text"),