Skip to content

Commit

Permalink
Add functions for attribute value normalization
Browse files Browse the repository at this point in the history
closes tafia#371
  • Loading branch information
dralley committed Jun 14, 2024
1 parent a44792f commit 826f75c
Show file tree
Hide file tree
Showing 7 changed files with 533 additions and 16 deletions.
15 changes: 11 additions & 4 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,19 @@

### New Features

- [#379]: Improved compliance with the XML attribute value normalization process by
adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`,
which ought to be used in place of `Attribute::unescape_value()` and
`Attribute::unescape_value_with()`

### Bug Fixes

### Misc Changes

- [#379]: Added tests for attribute value normalization
- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type.

[#379]: https://github.com/tafia/quick-xml/pull/379
[#650]: https://github.com/tafia/quick-xml/issues/650


Expand Down Expand Up @@ -57,9 +64,6 @@ resolve predefined entities.
- `quick_xml::escape::resolve_html5_entity`
- [#753]: Added parser for processing instructions: `quick_xml::reader::PiParser`.
- [#754]: Added parser for elements: `quick_xml::reader::ElementParser`.

### Bug Fixes

- [#622]: Fix wrong disregarding of not closed markup, such as lone `<`.
- [#684]: Fix incorrect position reported for `Error::IllFormed(DoubleHyphenInComment)`.
- [#684]: Fix incorrect position reported for `Error::IllFormed(MissingDoctypeName)`.
Expand Down Expand Up @@ -120,7 +124,6 @@ resolve predefined entities.
[`PayloadEvent`]: https://docs.rs/quick-xml/latest/quick_xml/de/enum.PayloadEvent.html
[`Text`]: https://docs.rs/quick-xml/latest/quick_xml/de/struct.Text.html


## 0.31.0 -- 2023-10-22

MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
Expand Down Expand Up @@ -291,6 +294,10 @@ serde >= 1.0.181
- [#565]: Fix compilation error when build with serde <1.0.139


### New Tests

- [#379]: Added tests for attribute value normalization

[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
[#490]: https://github.com/tafia/quick-xml/pull/490
[#510]: https://github.com/tafia/quick-xml/issues/510
Expand Down
12 changes: 4 additions & 8 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,13 @@ static INPUTS: &[(&str, &str)] = &[
("players.xml", PLAYERS),
];

// TODO: use fully normalized attribute values
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
let mut r = Reader::from_str(doc);
loop {
match criterion::black_box(r.read_event()?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
Event::Text(e) => {
Expand All @@ -67,15 +66,14 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
let mut buf = Vec::new();
loop {
match criterion::black_box(r.read_event_into(&mut buf)?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
Event::Text(e) => {
Expand All @@ -93,15 +91,14 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
let mut r = NsReader::from_str(doc);
loop {
match criterion::black_box(r.read_resolved_event()?) {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand All @@ -120,7 +117,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
let mut r = NsReader::from_reader(doc);
let mut buf = Vec::new();
Expand All @@ -129,7 +125,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand Down
72 changes: 72 additions & 0 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use std::borrow::Cow;

use criterion::{self, criterion_group, criterion_main, Criterion};
use pretty_assertions::assert_eq;
use quick_xml::escape::{escape, unescape};
use quick_xml::events::attributes::Attribute;
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::reader::{NsReader, Reader};
Expand Down Expand Up @@ -242,6 +245,74 @@ fn attributes(c: &mut Criterion) {
assert_eq!(count, 150);
})
});

group.finish();
}

/// Benchmarks normalizing attribute values
fn attribute_value_normalization(c: &mut Criterion) {
let mut group = c.benchmark_group("attribute_value_normalization");

group.bench_function("noop_short", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"foobar"),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("noop_long", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(LOREM_IPSUM_TEXT.as_bytes()),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("replacement_chars", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"just a bit\n of text without\tany entities"),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("char_reference", |b| {
let attr1 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;"),
};
let attr2 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"&#38;&#60;"),
};
b.iter(|| {
criterion::black_box(attr1.normalized_value()).unwrap();
criterion::black_box(attr2.normalized_value()).unwrap();
})
});

group.bench_function("entity_reference", |b| {
let attr1 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"age &gt; 72 &amp;&amp; age &lt; 21"),
};
let attr2 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"&quot;what&apos;s that?&quot;"),
};
b.iter(|| {
criterion::black_box(attr1.normalized_value()).unwrap();
criterion::black_box(attr2.normalized_value()).unwrap();
})
});

group.finish();
}

Expand Down Expand Up @@ -354,6 +425,7 @@ criterion_group!(
read_resolved_event_into,
one_event,
attributes,
attribute_value_normalization,
escaping,
unescaping,
);
Expand Down
1 change: 1 addition & 0 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ impl From<EscapeError> for Error {
}

impl From<AttrError> for Error {
/// Creates a new `Error::InvalidAttr` from the given error
#[inline]
fn from(error: AttrError) -> Self {
Error::InvalidAttr(error)
Expand Down
Loading

0 comments on commit 826f75c

Please sign in to comment.