Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Safer markdown truncating #1500

Merged
merged 6 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 3 additions & 14 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ prometheus-api-client = "0.5.4"
requests = "^2.32.3"
certifi = "^2023.7.22"
regex = "2024.5.15"
more_itertools = { version = "^10.3", python = "<3.12" }

[tool.poetry.dev-dependencies]
pre-commit = "^2.13.0"
Expand Down
29 changes: 27 additions & 2 deletions src/robusta/core/sinks/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def tabulate(*args, **kwargs):
ScanReportBlock,
TableBlock,
)
from robusta.utils.trim_markdown import trim_markdown


class Transformer:
Expand Down Expand Up @@ -58,10 +57,36 @@ def apply_length_limit(msg: str, max_length: int, truncator: Optional[str] = Non
truncator = truncator or "..."
return msg[: max_length - len(truncator)] + truncator

@staticmethod
def trim_markdown(text: str, max_length: int, suffix: str = "...") -> str:
if len(text) <= max_length:
return text
if max_length <= len(suffix):
return suffix[:max_length]
if '```' not in text:
return Transformer.apply_length_limit(text, max_length, suffix)

suffix_len = len(suffix)
code_markdown_len = len('```')
truncate_index = max_length - suffix_len

# edge case, last few characters contains a partial codeblock '`' character
# we shorten by a few extra characters so we don't accidentally write ````
end_buffer_index = max(truncate_index - code_markdown_len*2 - 1, 0)
if '`' in text[truncate_index:max_length] and '```' in text[end_buffer_index:max_length]:
truncate_index = end_buffer_index

count_removed_code_annotation = text.count('```', truncate_index, len(text))
needs_end_code_annotation = (count_removed_code_annotation % 2 == 1) # if there is an odd number of ``` removed
if needs_end_code_annotation:
return text[:truncate_index - code_markdown_len] + suffix + '```'
else:
return text[:truncate_index] + suffix

@staticmethod
def apply_length_limit_to_markdown(msg: str, max_length: int, truncator: str = "...") -> str:
try:
return trim_markdown(msg, max_length, truncator)
return Transformer.trim_markdown(msg, max_length, truncator)
except:
return Transformer.apply_length_limit(msg, max_length, truncator)

Expand Down
57 changes: 0 additions & 57 deletions src/robusta/utils/trim_markdown.py

This file was deleted.

92 changes: 48 additions & 44 deletions tests/test_trim_markdown.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,75 @@
import pytest

from robusta.utils.trim_markdown import trim_markdown
from robusta.core.sinks.transformer import Transformer


@pytest.mark.parametrize(
"max_length,expected_output", [
(0, ""),
(1, "#"),
(2, "##"),
(3, "##"),
(4, "##"),
(5, "##"),
(6, "##"),
(7, "##"),
(8, "``````##"),
(9, "```o```##"),
(10, "```oh```##"),
(13, "```oh``` he##"),
(16, "```oh``` hello##"),
(17, "```oh``` hello ##"),
(18, "```oh``` hello ##"),
(19, "```oh``` hello ##"),
(20, "```oh``` hello ##"),
(21, "```oh``` hello ##"),
(22, "```oh``` hello ##"),
(23, "```oh``` hello ``````##"),
(24, "```oh``` hello ```w```##"),
(25, "```oh``` hello ```wo```##"),
(27, "```oh``` hello ```worl```##"),
(28, "```oh``` hello ```world```##"),
(29, "```oh``` hello ```world``` ##"),
(31, "```oh``` hello ```world``` an##"),
(39, "```oh``` hello ```world``` and then ##"),
(42, "```oh``` hello ```world``` and then ##"),
(44, "```oh``` hello ```world``` and then ``````##"),
(48, "```oh``` hello ```world``` and then ```some```##"),
(52, "```oh``` hello ```world``` and then ```somethin```##"),
(53, "```oh``` hello ```world``` and then ```something```##"),
(54, "```oh``` hello ```world``` and then ```something```##"),
(111, "```oh``` hello ```world``` and then ```something```##"),
(9, "```...```"),
(10, "```t...```"),
(13, "```test...```"),
(16, "```testing...```"),
(28, "```testing 12345667 so...```"),
(29, "```testing 12345667 som...```"),
(31, "```testing 12345667 some ...```"),
(35, "```testing 12345667 some more...```"),
(36, "```testing 12345667 some more ...```"),
(37, "```testing 12345667 some more text```"),
(53, "```testing 12345667 some more text```"),
(54, "```testing 12345667 some more text```"),
(111, "```testing 12345667 some more text```"),
])
def test_trim_markdown(max_length: int, expected_output: str):
text = "```oh``` hello ```world``` and then ```something```"
trimmed = trim_markdown(text, max_length, "##")
text = "```testing 12345667 some more text```"
trimmed = Transformer.trim_markdown(text, max_length, "...")
assert trimmed == expected_output
assert len(trimmed) <= max_length

@pytest.mark.parametrize(
"max_length,expected_output", [
(9, "```...```"),
(10, "```t...```"),
(13, "```test...```"),
(31, "```testing 12345667 some ...```"),
(36, "```testing 12345667 some more ...```"),

# edge case, last few characters contains a partial codeblock '`'
# we cut off a few extra characters so we dont accidentally write ````
(37, "```testing 12345667 some...```"),
(38, "```testing 12345667 some ...```"),
(39, "```testing 12345667 some m...```"),

(40, "```testing 12345667 some more text```..."),
(43, "```testing 12345667 some more text``` so..."),
(52, "```testing 12345667 some more text``` some text a..."),
(53, "```testing 12345667 some more text``` some text af..."),
(54, "```testing 12345667 some more text``` some text aft..."),
(76, "```testing 12345667 some more text``` some text after stuff sdkljhadsflka..."),
(77, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkas..."),
(78, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl"),
(100, "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl"),
])
def test_trim_markdown_with_text(max_length: int, expected_output: str):
text = "```testing 12345667 some more text``` some text after stuff sdkljhadsflkashdfl"
trimmed = Transformer.trim_markdown(text, max_length, "...")
print(f"{trimmed}")
assert trimmed == expected_output
assert len(trimmed) <= max_length


@pytest.mark.parametrize(
"max_length,expected_output", [
(0, ""),
(1, "$"),
(2, "$$"),
(3, "$$$"),
(4, "N$$$"),
(5, "No$$$"),
(10, "No code$$$"),
(38, "No code blocks whatsoever in this t$$$"),
(39, "No code blocks whatsoever in this te$$$"),
(40, "No code blocks whatsoever in this tex$$$"),
(41, "No code blocks whatsoever in this text"),
(42, "No code blocks whatsoever in this text"),
(111, "No code blocks whatsoever in this text"),
]
)
def test_trim_markdown_no_code_blocks(max_length: int, expected_output: str):
text = "No code blocks whatsoever in this text"
trimmed = trim_markdown(text, max_length, "$$$")
trimmed = Transformer.trim_markdown(text, max_length, "$$$")
assert trimmed == expected_output
assert len(trimmed) <= max_length
Loading