Skip to content

Commit

Permalink
fix: don't produce <string> elements in _resolve_placeholders
Browse files Browse the repository at this point in the history
There's no such HTML element, it was probably an attempt to work around lxml's shortcomings.
  • Loading branch information
MHajoha committed Jun 18, 2024
1 parent 4adabe7 commit af68006
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 53 deletions.
78 changes: 51 additions & 27 deletions questionpy_sdk/webserver/question_ui.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# This file is part of the QuestionPy SDK. (https://questionpy.org)
# The QuestionPy SDK is free software released under terms of the MIT license. See LICENSE.md.
# (c) Technische Universität Berlin, innoCampus <[email protected]>
from __future__ import annotations

import re
from functools import cached_property
from random import Random
from typing import Any

import lxml.html
import lxml.html.clean
from lxml import etree
from lxml.html.clean import Cleaner
from pydantic import BaseModel


Expand Down Expand Up @@ -111,6 +113,35 @@ def _int_to_roman(index: int) -> str:
return roman_num


def _add_text_before(before: etree._Element, text: str) -> None:
"""Add plain text before the given sibling.
LXML doesn't represent text as nodes, but as attributes of the parent or of the preceding node, which makes this
less trivial than one might expect.
"""
prev = before.getprevious()
if prev is None:
parent = _require_parent(before)
# The parent's 'text' attribute sets the text before the first child.
parent.text = ("" if parent.text is None else parent.text) + text
else:
prev.tail = ("" if prev.tail is None else prev.tail) + text


def _require_parent(node: etree._Element) -> etree._Element:
parent = node.getparent()
if parent is None:
msg = f"Node '{node}' on line '{node.sourceline}' somehow has no parent."
raise ValueError(msg)
return parent


def _remove_preserving_tail(node: etree._Element) -> None:
if node.tail is not None:
_add_text_before(node, node.tail)
_require_parent(node).remove(node)


class QuestionMetadata:
def __init__(self) -> None:
self.correct_response: dict[str, str] = {}
Expand All @@ -128,6 +159,7 @@ class QuestionDisplayOptions(BaseModel):

class QuestionUIRenderer:
"""General renderer for the question UI except for the formulation part."""

XHTML_NAMESPACE: str = "http://www.w3.org/1999/xhtml"
QPY_NAMESPACE: str = "http://questionpy.org/ns/question"

Expand Down Expand Up @@ -183,7 +215,7 @@ def _resolve_placeholders(self) -> None:
continue
parts = p_instruction.text.strip().split()
key = parts[0]
clean_option = parts[1] if len(parts) > 1 else "clean"
clean_option = parts[1].lower() if len(parts) > 1 else "clean"

parent = p_instruction.getparent()
if parent is None:
Expand All @@ -195,32 +227,24 @@ def _resolve_placeholders(self) -> None:

raw_value = self._placeholders[key]

if clean_option.lower() not in {"clean", "noclean"}:
if clean_option.lower() != "plain":
msg = f"clean_option expected to have value 'plain', found '{clean_option}' instead"
raise ValueError(msg)
# Treat the value as plain text
root = etree.Element("string")
root.text = etree.CDATA(raw_value)
parent.replace(p_instruction, root)
continue

if clean_option.lower() == "clean":
cleaner = Cleaner()
cleaned_value = etree.fromstring(cleaner.clean_html(raw_value))
# clean_html wraps the result in <p> or <div>
# Remove the wrapping from clean_html
content = ""
if cleaned_value.text:
content += cleaned_value.text
for child in cleaned_value:
content += etree.tostring(child, encoding="unicode", with_tail=True)
replacement = content
if clean_option == "plain":
# Treat the value as plain text.
_add_text_before(p_instruction, raw_value)
else:
replacement = raw_value

p_instruction.addnext(etree.fromstring(f"<string>{replacement}</string>"))
parent.remove(p_instruction)
# html.clean works on different element classes than etree, so we need to use different parse functions.
# Since the HTML elements are subclasses of the etree elements though, we can reuse them without dumping
# and reparsing.

# It doesn't really matter what element we wrap the fragment with, as we'll unwrap it immediately.
fragment = lxml.html.fragment_fromstring(raw_value, create_parent=True)
if clean_option != "noclean":
lxml.html.clean.clean(fragment)
if fragment.text is not None:
_add_text_before(p_instruction, fragment.text)
for child in fragment:
p_instruction.addprevious(child)

_remove_preserving_tail(p_instruction)

def _hide_unwanted_feedback(self) -> None:
"""Hides elements marked with `qpy:feedback` if the type of feedback is disabled in `options`."""
Expand Down
14 changes: 7 additions & 7 deletions tests/webserver/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@ def normalize_element(element: etree._Element) -> etree._Element:
return element


def assert_xhtml_is_equal(xhtml1: str, xhtml2: str) -> None:
parser = etree.XMLParser(remove_blank_text=True)
tree1 = etree.fromstring(xhtml1, parser)
tree2 = etree.fromstring(xhtml2, parser)
def assert_html_is_equal(actual: str, expected: str) -> None:
parser = etree.HTMLParser(remove_blank_text=True)
actual_tree = etree.fromstring(actual, parser)
expected_tree = etree.fromstring(expected, parser)

normalize_element(tree1)
normalize_element(tree2)
normalize_element(actual_tree)
normalize_element(expected_tree)

assert etree.tostring(tree1, method="c14n") == etree.tostring(tree2, method="c14n")
assert etree.tostring(actual_tree, method="c14n") == etree.tostring(expected_tree, method="c14n")
37 changes: 18 additions & 19 deletions tests/webserver/test_question_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
QuestionMetadata,
QuestionUIRenderer,
)
from tests.webserver.conftest import assert_xhtml_is_equal
from tests.webserver.conftest import assert_html_is_equal


@pytest.fixture
Expand Down Expand Up @@ -83,20 +83,19 @@ def test_should_extract_correct_metadata(xml_content: str) -> None:
}
)
def test_should_resolve_placeholders(result: str) -> None:
# TODO: remove <string> surrounding the resolved placeholder
expected = """
<div>
<div><string>My simple description.</string></div>
<span>By default cleaned parameter: <string>Value of param <b>one</b>.</string></span>
<span>Explicitly cleaned parameter: <string>Value of param <b>one</b>.</string></span>
<span>Noclean parameter: <string>Value of param <b>one</b>.<script>'Oh no, danger!'</script></string></span>
<div>My simple description.</div>
<span>By default cleaned parameter: Value of param <b>one</b>.</span>
<span>Explicitly cleaned parameter: Value of param <b>one</b>.</span>
<span>Noclean parameter: Value of param <b>one</b>.<script>'Oh no, danger!'</script></span>
<span>Plain parameter:
<string><![CDATA[Value of param <b>one</b>.<script>'Oh no, danger!'</script>]]></string>
Value of param &lt;b>one&lt;/b>.&lt;script>'Oh no, danger!'&lt;/script>
</span>
</div>
"""

assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.ui_file("feedbacks")
Expand All @@ -107,7 +106,7 @@ def test_should_hide_inline_feedback(result: str) -> None:
<span>No feedback</span>
</div>
"""
assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.ui_file("feedbacks")
Expand All @@ -119,7 +118,7 @@ def test_should_show_inline_feedback(result: str) -> None:
<span>Specific feedback</span>
</div>
"""
assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -151,7 +150,7 @@ def test_element_visibility_based_on_role(user_context: str, expected: str, xml_
renderer = QuestionUIRenderer(xml_content, {}, options)
result = renderer.html

assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.ui_file("inputs")
Expand Down Expand Up @@ -180,7 +179,7 @@ def test_should_set_input_values(result: str) -> None:
<input name="my_radio" type="radio" value="radio_value_2" class="qpy-input" checked="checked"/>
</div>
"""
assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.ui_file("inputs")
Expand All @@ -201,7 +200,7 @@ def test_should_disable_inputs(result: str) -> None:
<input name="my_radio" type="radio" value="radio_value_2" disabled="disabled" class="qpy-input"/>
</div>
"""
assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.ui_file("validations")
Expand All @@ -220,7 +219,7 @@ def test_should_soften_validations(result: str) -> None:
</div>
"""

assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.ui_file("buttons")
Expand All @@ -236,7 +235,7 @@ def test_should_defuse_buttons(result: str) -> None:
<input class="btn btn-primary qpy-input" type="button" value="Button"/>
</div>
"""
assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.skip("""1. The text directly in the root of <qpy:formulation> is not copied in render_part.
Expand All @@ -254,7 +253,7 @@ def test_should_format_floats_in_en(result: str) -> None:
Strip zeros: <span>1.1</span>
</div>
"""
assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.ui_file("shuffle")
Expand Down Expand Up @@ -287,7 +286,7 @@ def test_should_replace_shuffled_index(result: str) -> None:
</fieldset>
</div>
"""
assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.render_params(
Expand All @@ -307,7 +306,7 @@ def test_clean_up(result: str) -> None:
<regular>Normal Content</regular>
</div>
"""
assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)


@pytest.mark.ui_file("qpy-urls")
Expand All @@ -324,4 +323,4 @@ def test_should_replace_qpy_urls(result: str) -> None:
</div>
"""

assert_xhtml_is_equal(result, expected)
assert_html_is_equal(result, expected)

0 comments on commit af68006

Please sign in to comment.