Skip to content

Commit

Permalink
Merge pull request tafia#677 from Mingun/config
Browse files Browse the repository at this point in the history
Introduce `Config` struct that holds parser configuration and implement tafia#513
  • Loading branch information
Mingun authored Nov 5, 2023
2 parents b95b503 + 3875bdb commit bbc7bda
Show file tree
Hide file tree
Showing 27 changed files with 1,181 additions and 395 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ async-tokio = ["tokio"]
## # }
## let xml = to_utf16le_with_bom(r#"<?xml encoding='UTF-16'><element/>"#);
## let mut reader = Reader::from_reader(xml.as_ref());
## reader.trim_text(true);
## reader.config_mut().trim_text(true);
##
## let mut buf = Vec::new();
## let mut unsupported = false;
Expand Down
11 changes: 11 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,17 @@

## Unreleased

The way to configure parser is changed. Now all configuration is contained in the
`Config` struct and can be applied at once. When `serde-types` feature is enabled,
configuration is serializable.

### New Features

- [#513]: Allow to continue parsing after getting new `Error::IllFormed`.
- [#677]: Added methods `config()` and `config_mut()` to inspect and change the parser
configuration. Previous builder methods on `Reader` / `NsReader` was replaced by
direct access to fields of config using `reader.config_mut().<...>`.

### Bug Fixes

### Misc Changes
Expand All @@ -26,7 +35,9 @@
- `Error::UnexpectedEof` replaced by `IllFormedError` in some cases
- `Error::UnexpectedToken` replaced by `IllFormedError::DoubleHyphenInComment`

[#513]: https://github.com/tafia/quick-xml/issues/513
[#675]: https://github.com/tafia/quick-xml/pull/675
[#677]: https://github.com/tafia/quick-xml/pull/677


## 0.31.0 -- 2023-10-22
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ let xml = r#"<tag1 att1 = "test">
<tag2>Test 2</tag2>
</tag1>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
reader.config_mut().trim_text(true);

let mut count = 0;
let mut txt = Vec::new();
Expand Down Expand Up @@ -73,7 +73,7 @@ use std::io::Cursor;

let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
reader.config_mut().trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
loop {
match reader.read_event() {
Expand Down
30 changes: 20 additions & 10 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn read_event(c: &mut Criterion) {
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = Reader::from_str(SAMPLE);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand All @@ -49,7 +49,9 @@ fn read_event(c: &mut Criterion) {
group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = Reader::from_str(SAMPLE);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand All @@ -74,7 +76,7 @@ fn read_resolved_event_into(c: &mut Criterion) {
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = NsReader::from_str(SAMPLE);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_resolved_event() {
Expand All @@ -93,7 +95,9 @@ fn read_resolved_event_into(c: &mut Criterion) {
group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = NsReader::from_str(SAMPLE);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_resolved_event() {
Expand All @@ -120,7 +124,9 @@ fn one_event(c: &mut Criterion) {
b.iter(|| {
let mut r = Reader::from_str(&src);
let mut nbtxt = criterion::black_box(0);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
match r.read_event() {
Ok(Event::Start(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
Expand All @@ -135,7 +141,9 @@ fn one_event(c: &mut Criterion) {
b.iter(|| {
let mut r = Reader::from_str(&src);
let mut nbtxt = criterion::black_box(0);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
match r.read_event() {
Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
Expand All @@ -150,7 +158,9 @@ fn one_event(c: &mut Criterion) {
b.iter(|| {
let mut r = Reader::from_str(&src);
let mut nbtxt = criterion::black_box(0);
r.trim_text(true).check_end_names(false);
let config = r.config_mut();
config.trim_text(true);
config.check_end_names = false;
match r.read_event() {
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
Expand All @@ -168,7 +178,7 @@ fn attributes(c: &mut Criterion) {
group.bench_function("with_checks = true", |b| {
b.iter(|| {
let mut r = Reader::from_str(PLAYERS);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand All @@ -189,7 +199,7 @@ fn attributes(c: &mut Criterion) {
group.bench_function("with_checks = false", |b| {
b.iter(|| {
let mut r = Reader::from_str(PLAYERS);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand All @@ -210,7 +220,7 @@ fn attributes(c: &mut Criterion) {
group.bench_function("try_get_attribute", |b| {
b.iter(|| {
let mut r = Reader::from_str(PLAYERS);
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
loop {
match r.read_event() {
Expand Down
2 changes: 1 addition & 1 deletion compare/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ fn low_level_comparison(c: &mut Criterion) {
|b, input| {
b.iter(|| {
let mut r = Reader::from_reader(input.as_bytes());
r.check_end_names(false);
r.config_mut().check_end_names = false;
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
Expand Down
2 changes: 1 addition & 1 deletion examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const DATA: &str = r#"

fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
reader.trim_text(true);
reader.config_mut().trim_text(true);

let mut custom_entities: HashMap<String, String> = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;
Expand Down
2 changes: 1 addition & 1 deletion examples/read_buffered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ fn main() -> Result<(), quick_xml::Error> {
use quick_xml::reader::Reader;

let mut reader = Reader::from_file("tests/documents/document.xml")?;
reader.trim_text(true);
reader.config_mut().trim_text(true);

let mut buf = Vec::new();

Expand Down
6 changes: 4 additions & 2 deletions examples/read_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,16 @@ fn main() -> Result<(), AppError> {
let mut translations: Vec<Translation> = Vec::new();

let mut reader = Reader::from_str(XML);
reader.trim_text(true);
let config = reader.config_mut();

config.trim_text(true);
// == Handling empty elements ==
// To simply our processing code
// we want the same events for empty elements, like:
// <DefaultSettings Language="es" Greeting="HELLO"/>
// <Text/>
reader.expand_empty_elements(true);
config.expand_empty_elements = true;

let mut buf = Vec::new();

loop {
Expand Down
2 changes: 1 addition & 1 deletion examples/read_texts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ fn main() {
<tag1>text3</tag1><tag1><tag2>text4</tag2></tag1>";

let mut reader = Reader::from_str(xml);
reader.trim_text(true);
reader.config_mut().trim_text(true);

loop {
match reader.read_event() {
Expand Down
7 changes: 3 additions & 4 deletions fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ where
{
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut buf = vec![];
let reader = reader
.expand_empty_elements(true)
.trim_text(true)
.trim_text_end(true);
let config = reader.config_mut();
config.expand_empty_elements = true;
config.trim_text(true);
loop {
let event_result = reader.read_event_into(&mut buf);
if let Ok(ref event) = event_result {
Expand Down
19 changes: 4 additions & 15 deletions fuzz/fuzz_targets/structured_roundtrip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use arbitrary::{Arbitrary, Unstructured};
use libfuzzer_sys::fuzz_target;
use quick_xml::events::{BytesCData, BytesText, Event};
use quick_xml::reader::{NsReader, Reader};
use quick_xml::reader::{Config, NsReader, Reader};
use quick_xml::writer::Writer;
use std::{hint::black_box, io::Cursor};

Expand Down Expand Up @@ -41,7 +41,7 @@ enum WriterFunc<'a> {
#[derive(Debug, arbitrary::Arbitrary)]
struct Driver<'a> {
writer_funcs: Vec<WriterFunc<'a>>,
reader_config: Vec<bool>,
reader_config: Config,
}

fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
Expand Down Expand Up @@ -83,13 +83,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
let xml = writer.into_inner().into_inner();
// The str should be valid as we just generated it, unwrapping **should** be safe.
let mut reader = Reader::from_str(std::str::from_utf8(&xml).unwrap());
let mut config_iter = driver.reader_config.iter();
reader.check_comments(*config_iter.next().unwrap_or(&false));
reader.check_end_names(*config_iter.next().unwrap_or(&false));
reader.expand_empty_elements(*config_iter.next().unwrap_or(&false));
reader.trim_markup_names_in_closing_tags(*config_iter.next().unwrap_or(&false));
reader.trim_text(*config_iter.next().unwrap_or(&false));
reader.trim_text_end(*config_iter.next().unwrap_or(&false));
*reader.config_mut() = driver.reader_config.clone();

loop {
let event = black_box(reader.read_event()?);
Expand All @@ -99,12 +93,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
}

let mut reader = NsReader::from_reader(&xml[..]);
reader.check_comments(*config_iter.next().unwrap_or(&false));
reader.check_end_names(*config_iter.next().unwrap_or(&false));
reader.expand_empty_elements(*config_iter.next().unwrap_or(&false));
reader.trim_markup_names_in_closing_tags(*config_iter.next().unwrap_or(&false));
reader.trim_text(*config_iter.next().unwrap_or(&false));
reader.trim_text_end(*config_iter.next().unwrap_or(&false));
*reader.config_mut() = driver.reader_config;

loop {
let event = black_box(reader.read_event()?);
Expand Down
11 changes: 7 additions & 4 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2476,7 +2476,7 @@ where
///
/// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
/// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
/// [auto-expanding feature]: Reader::expand_empty_elements
/// [auto-expanding feature]: crate::reader::Config::expand_empty_elements
#[cfg(feature = "overlapped-lists")]
pub fn event_buffer_size(&mut self, limit: Option<NonZeroUsize>) -> &mut Self {
self.limit = limit;
Expand Down Expand Up @@ -2761,7 +2761,8 @@ where
/// and use specified entity resolver.
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
let mut reader = Reader::from_str(source);
reader.expand_empty_elements(true);
let config = reader.config_mut();
config.expand_empty_elements = true;

Self::new(
SliceReader {
Expand Down Expand Up @@ -2803,7 +2804,8 @@ where
/// UTF-8, you can decode it first before using [`from_str`].
pub fn with_resolver(reader: R, entity_resolver: E) -> Self {
let mut reader = Reader::from_reader(reader);
reader.expand_empty_elements(true);
let config = reader.config_mut();
config.expand_empty_elements = true;

Self::new(
IoReader {
Expand Down Expand Up @@ -3709,7 +3711,8 @@ mod tests {
start_trimmer: StartTrimmer::default(),
};

reader.reader.expand_empty_elements(true);
let config = reader.reader.config_mut();
config.expand_empty_elements = true;

let mut events = Vec::new();

Expand Down
5 changes: 4 additions & 1 deletion src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ impl std::error::Error for SyntaxError {}
/// An error returned if parsed document is not [well-formed], for example,
/// an opened tag is not closed before end of input.
///
/// Those errors are not fatal: after encountering an error you can continue
/// parsing the document.
///
/// [well-formed]: https://www.w3.org/TR/xml11/#dt-wellformed
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IllFormedError {
Expand Down Expand Up @@ -93,7 +96,7 @@ pub enum IllFormedError {
/// mostly artificial, but you can enable it in the [configuration].
///
/// [specification]: https://www.w3.org/TR/xml11/#sec-comments
/// [configuration]: crate::reader::Reader::check_comments
/// [configuration]: crate::reader::Config::check_comments
DoubleHyphenInComment,
}

Expand Down
12 changes: 6 additions & 6 deletions src/reader/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
/// <tag2>Test 2</tag2>
/// </tag1>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
///
/// let mut count = 0;
/// let mut buf = Vec::new();
Expand Down Expand Up @@ -111,7 +111,7 @@ impl<R: AsyncBufRead + Unpin> Reader<R> {
/// </inner>
/// </outer>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
/// let mut buf = Vec::new();
///
/// let start = BytesStart::new("outer");
Expand Down Expand Up @@ -186,7 +186,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
/// <y:tag2>Test 2</y:tag2>
/// </x:tag1>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
///
/// let mut count = 0;
/// let mut buf = Vec::new();
Expand Down Expand Up @@ -257,7 +257,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
/// </inner>
/// </outer>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
/// let mut buf = Vec::new();
///
/// let ns = Namespace(b"namespace 1");
Expand Down Expand Up @@ -292,7 +292,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
buf: &mut Vec<u8>,
) -> Result<Span> {
// According to the https://www.w3.org/TR/xml11/#dt-etag, end name should
// match literally the start name. See `Reader::check_end_names` documentation
// match literally the start name. See `Config::check_end_names` documentation
self.reader.read_to_end_into_async(end, buf).await
}

Expand Down Expand Up @@ -321,7 +321,7 @@ impl<R: AsyncBufRead + Unpin> NsReader<R> {
/// <y:tag2>Test 2</y:tag2>
/// </x:tag1>
/// "#.as_bytes());
/// reader.trim_text(true);
/// reader.config_mut().trim_text(true);
///
/// let mut count = 0;
/// let mut buf = Vec::new();
Expand Down
Loading

0 comments on commit bbc7bda

Please sign in to comment.