diff --git a/Cargo.toml b/Cargo.toml index 41189800..eb2f794d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,7 +85,7 @@ async-tokio = ["tokio"] ## # } ## let xml = to_utf16le_with_bom(r#""#); ## let mut reader = Reader::from_reader(xml.as_ref()); -## reader.trim_text(true); +## reader.config_mut().trim_text(true); ## ## let mut buf = Vec::new(); ## let mut unsupported = false; diff --git a/Changelog.md b/Changelog.md index 40cce462..dc4b422b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,8 +10,17 @@ ## Unreleased +The way to configure parser is changed. Now all configuration is contained in the +`Config` struct and can be applied at once. When `serde-types` feature is enabled, +configuration is serializable. + ### New Features +- [#513]: Allow to continue parsing after getting new `Error::IllFormed`. +- [#677]: Added methods `config()` and `config_mut()` to inspect and change the parser + configuration. Previous builder methods on `Reader` / `NsReader` was replaced by + direct access to fields of config using `reader.config_mut().<...>`. + ### Bug Fixes ### Misc Changes @@ -26,7 +35,9 @@ - `Error::UnexpectedEof` replaced by `IllFormedError` in some cases - `Error::UnexpectedToken` replaced by `IllFormedError::DoubleHyphenInComment` +[#513]: https://github.com/tafia/quick-xml/issues/513 [#675]: https://github.com/tafia/quick-xml/pull/675 +[#677]: https://github.com/tafia/quick-xml/pull/677 ## 0.31.0 -- 2023-10-22 diff --git a/README.md b/README.md index 7de9dbbf..4d51d268 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ let xml = r#" Test 2 "#; let mut reader = Reader::from_str(xml); -reader.trim_text(true); +reader.config_mut().trim_text(true); let mut count = 0; let mut txt = Vec::new(); @@ -73,7 +73,7 @@ use std::io::Cursor; let xml = r#"text"#; let mut reader = Reader::from_str(xml); -reader.trim_text(true); +reader.config_mut().trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); loop { match reader.read_event() { diff --git a/benches/microbenches.rs b/benches/microbenches.rs index be0df142..2f4ece04 100644 --- a/benches/microbenches.rs +++ b/benches/microbenches.rs @@ -30,7 +30,7 @@ fn read_event(c: &mut Criterion) { group.bench_function("trim_text = false", |b| { b.iter(|| { let mut r = Reader::from_str(SAMPLE); - r.check_end_names(false); + r.config_mut().check_end_names = false; let mut count = criterion::black_box(0); loop { match r.read_event() { @@ -49,7 +49,9 @@ fn read_event(c: &mut Criterion) { group.bench_function("trim_text = true", |b| { b.iter(|| { let mut r = Reader::from_str(SAMPLE); - r.trim_text(true).check_end_names(false); + let config = r.config_mut(); + config.trim_text(true); + config.check_end_names = false; let mut count = criterion::black_box(0); loop { match r.read_event() { @@ -74,7 +76,7 @@ fn read_resolved_event_into(c: &mut Criterion) { group.bench_function("trim_text = false", |b| { b.iter(|| { let mut r = NsReader::from_str(SAMPLE); - r.check_end_names(false); + r.config_mut().check_end_names = false; let mut count = criterion::black_box(0); loop { match r.read_resolved_event() { @@ -93,7 +95,9 @@ fn read_resolved_event_into(c: &mut Criterion) { group.bench_function("trim_text = true", |b| { b.iter(|| { let mut r = NsReader::from_str(SAMPLE); - r.trim_text(true).check_end_names(false); + let config = r.config_mut(); + config.trim_text(true); + config.check_end_names = false; let mut count = criterion::black_box(0); loop { match r.read_resolved_event() { @@ -120,7 +124,9 @@ fn one_event(c: &mut Criterion) { b.iter(|| { let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); - r.trim_text(true).check_end_names(false); + let config = r.config_mut(); + config.trim_text(true); + config.check_end_names = false; match r.read_event() { Ok(Event::Start(ref e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), @@ -135,7 +141,9 @@ fn one_event(c: &mut Criterion) { b.iter(|| { let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); - r.trim_text(true).check_end_names(false); + let config = r.config_mut(); + config.trim_text(true); + config.check_end_names = false; match r.read_event() { Ok(Event::Comment(e)) => nbtxt += e.unescape().unwrap().len(), something_else => panic!("Did not expect {:?}", something_else), @@ -150,7 +158,9 @@ fn one_event(c: &mut Criterion) { b.iter(|| { let mut r = Reader::from_str(&src); let mut nbtxt = criterion::black_box(0); - r.trim_text(true).check_end_names(false); + let config = r.config_mut(); + config.trim_text(true); + config.check_end_names = false; match r.read_event() { Ok(Event::CData(ref e)) => nbtxt += e.len(), something_else => panic!("Did not expect {:?}", something_else), @@ -168,7 +178,7 @@ fn attributes(c: &mut Criterion) { group.bench_function("with_checks = true", |b| { b.iter(|| { let mut r = Reader::from_str(PLAYERS); - r.check_end_names(false); + r.config_mut().check_end_names = false; let mut count = criterion::black_box(0); loop { match r.read_event() { @@ -189,7 +199,7 @@ fn attributes(c: &mut Criterion) { group.bench_function("with_checks = false", |b| { b.iter(|| { let mut r = Reader::from_str(PLAYERS); - r.check_end_names(false); + r.config_mut().check_end_names = false; let mut count = criterion::black_box(0); loop { match r.read_event() { @@ -210,7 +220,7 @@ fn attributes(c: &mut Criterion) { group.bench_function("try_get_attribute", |b| { b.iter(|| { let mut r = Reader::from_str(PLAYERS); - r.check_end_names(false); + r.config_mut().check_end_names = false; let mut count = criterion::black_box(0); loop { match r.read_event() { diff --git a/compare/benches/bench.rs b/compare/benches/bench.rs index 9ec1a968..04284340 100644 --- a/compare/benches/bench.rs +++ b/compare/benches/bench.rs @@ -59,7 +59,7 @@ fn low_level_comparison(c: &mut Criterion) { |b, input| { b.iter(|| { let mut r = Reader::from_reader(input.as_bytes()); - r.check_end_names(false); + r.config_mut().check_end_names = false; let mut count = criterion::black_box(0); let mut buf = Vec::new(); loop { diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs index 99c59c12..b1e3334c 100644 --- a/examples/custom_entities.rs +++ b/examples/custom_entities.rs @@ -25,7 +25,7 @@ const DATA: &str = r#" fn main() -> Result<(), Box> { let mut reader = Reader::from_str(DATA); - reader.trim_text(true); + reader.config_mut().trim_text(true); let mut custom_entities: HashMap = HashMap::new(); let entity_re = Regex::new(r#""#)?; diff --git a/examples/read_buffered.rs b/examples/read_buffered.rs index 16cb2c68..4f0a20ec 100644 --- a/examples/read_buffered.rs +++ b/examples/read_buffered.rs @@ -8,7 +8,7 @@ fn main() -> Result<(), quick_xml::Error> { use quick_xml::reader::Reader; let mut reader = Reader::from_file("tests/documents/document.xml")?; - reader.trim_text(true); + reader.config_mut().trim_text(true); let mut buf = Vec::new(); diff --git a/examples/read_nodes.rs b/examples/read_nodes.rs index 07a7ad6b..9fe6667e 100644 --- a/examples/read_nodes.rs +++ b/examples/read_nodes.rs @@ -105,14 +105,16 @@ fn main() -> Result<(), AppError> { let mut translations: Vec = Vec::new(); let mut reader = Reader::from_str(XML); - reader.trim_text(true); + let config = reader.config_mut(); + config.trim_text(true); // == Handling empty elements == // To simply our processing code // we want the same events for empty elements, like: // // - reader.expand_empty_elements(true); + config.expand_empty_elements = true; + let mut buf = Vec::new(); loop { diff --git a/examples/read_texts.rs b/examples/read_texts.rs index c2d79f07..666d1bdb 100644 --- a/examples/read_texts.rs +++ b/examples/read_texts.rs @@ -6,7 +6,7 @@ fn main() { text3text4"; let mut reader = Reader::from_str(xml); - reader.trim_text(true); + reader.config_mut().trim_text(true); loop { match reader.read_event() { diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs index b8dbd187..bfac5e13 100644 --- a/fuzz/fuzz_targets/fuzz_target_1.rs +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -17,10 +17,9 @@ where { let mut writer = Writer::new(Cursor::new(Vec::new())); let mut buf = vec![]; - let reader = reader - .expand_empty_elements(true) - .trim_text(true) - .trim_text_end(true); + let config = reader.config_mut(); + config.expand_empty_elements = true; + config.trim_text(true); loop { let event_result = reader.read_event_into(&mut buf); if let Ok(ref event) = event_result { diff --git a/fuzz/fuzz_targets/structured_roundtrip.rs b/fuzz/fuzz_targets/structured_roundtrip.rs index 4febe1cd..26eec825 100644 --- a/fuzz/fuzz_targets/structured_roundtrip.rs +++ b/fuzz/fuzz_targets/structured_roundtrip.rs @@ -3,7 +3,7 @@ use arbitrary::{Arbitrary, Unstructured}; use libfuzzer_sys::fuzz_target; use quick_xml::events::{BytesCData, BytesText, Event}; -use quick_xml::reader::{NsReader, Reader}; +use quick_xml::reader::{Config, NsReader, Reader}; use quick_xml::writer::Writer; use std::{hint::black_box, io::Cursor}; @@ -41,7 +41,7 @@ enum WriterFunc<'a> { #[derive(Debug, arbitrary::Arbitrary)] struct Driver<'a> { writer_funcs: Vec>, - reader_config: Vec, + reader_config: Config, } fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> { @@ -83,13 +83,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> { let xml = writer.into_inner().into_inner(); // The str should be valid as we just generated it, unwrapping **should** be safe. let mut reader = Reader::from_str(std::str::from_utf8(&xml).unwrap()); - let mut config_iter = driver.reader_config.iter(); - reader.check_comments(*config_iter.next().unwrap_or(&false)); - reader.check_end_names(*config_iter.next().unwrap_or(&false)); - reader.expand_empty_elements(*config_iter.next().unwrap_or(&false)); - reader.trim_markup_names_in_closing_tags(*config_iter.next().unwrap_or(&false)); - reader.trim_text(*config_iter.next().unwrap_or(&false)); - reader.trim_text_end(*config_iter.next().unwrap_or(&false)); + *reader.config_mut() = driver.reader_config.clone(); loop { let event = black_box(reader.read_event()?); @@ -99,12 +93,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> { } let mut reader = NsReader::from_reader(&xml[..]); - reader.check_comments(*config_iter.next().unwrap_or(&false)); - reader.check_end_names(*config_iter.next().unwrap_or(&false)); - reader.expand_empty_elements(*config_iter.next().unwrap_or(&false)); - reader.trim_markup_names_in_closing_tags(*config_iter.next().unwrap_or(&false)); - reader.trim_text(*config_iter.next().unwrap_or(&false)); - reader.trim_text_end(*config_iter.next().unwrap_or(&false)); + *reader.config_mut() = driver.reader_config; loop { let event = black_box(reader.read_event()?); diff --git a/src/de/mod.rs b/src/de/mod.rs index d7210109..4ce3ba99 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2476,7 +2476,7 @@ where /// /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack - /// [auto-expanding feature]: Reader::expand_empty_elements + /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements #[cfg(feature = "overlapped-lists")] pub fn event_buffer_size(&mut self, limit: Option) -> &mut Self { self.limit = limit; @@ -2761,7 +2761,8 @@ where /// and use specified entity resolver. pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self { let mut reader = Reader::from_str(source); - reader.expand_empty_elements(true); + let config = reader.config_mut(); + config.expand_empty_elements = true; Self::new( SliceReader { @@ -2803,7 +2804,8 @@ where /// UTF-8, you can decode it first before using [`from_str`]. pub fn with_resolver(reader: R, entity_resolver: E) -> Self { let mut reader = Reader::from_reader(reader); - reader.expand_empty_elements(true); + let config = reader.config_mut(); + config.expand_empty_elements = true; Self::new( IoReader { @@ -3709,7 +3711,8 @@ mod tests { start_trimmer: StartTrimmer::default(), }; - reader.reader.expand_empty_elements(true); + let config = reader.reader.config_mut(); + config.expand_empty_elements = true; let mut events = Vec::new(); diff --git a/src/errors.rs b/src/errors.rs index 0893edca..78658eee 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -65,6 +65,9 @@ impl std::error::Error for SyntaxError {} /// An error returned if parsed document is not [well-formed], for example, /// an opened tag is not closed before end of input. /// +/// Those errors are not fatal: after encountering an error you can continue +/// parsing the document. +/// /// [well-formed]: https://www.w3.org/TR/xml11/#dt-wellformed #[derive(Clone, Debug, PartialEq, Eq)] pub enum IllFormedError { @@ -93,7 +96,7 @@ pub enum IllFormedError { /// mostly artificial, but you can enable it in the [configuration]. /// /// [specification]: https://www.w3.org/TR/xml11/#sec-comments - /// [configuration]: crate::reader::Reader::check_comments + /// [configuration]: crate::reader::Config::check_comments DoubleHyphenInComment, } diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index 91af7781..207c1b64 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -49,7 +49,7 @@ impl Reader { /// Test 2 /// /// "#.as_bytes()); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); @@ -111,7 +111,7 @@ impl Reader { /// /// /// "#.as_bytes()); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let start = BytesStart::new("outer"); @@ -186,7 +186,7 @@ impl NsReader { /// Test 2 /// /// "#.as_bytes()); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); @@ -257,7 +257,7 @@ impl NsReader { /// /// /// "#.as_bytes()); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); @@ -292,7 +292,7 @@ impl NsReader { buf: &mut Vec, ) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should - // match literally the start name. See `Reader::check_end_names` documentation + // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end_into_async(end, buf).await } @@ -321,7 +321,7 @@ impl NsReader { /// Test 2 /// /// "#.as_bytes()); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index d2089b79..a614d603 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -283,7 +283,7 @@ impl Reader { /// Test 2 /// "#; /// let mut reader = Reader::from_str(xml); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); @@ -368,7 +368,7 @@ impl Reader { /// /// /// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let start = BytesStart::new("outer"); @@ -390,8 +390,8 @@ impl Reader { /// [`End`]: Event::End /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`read_to_end()`]: Self::read_to_end - /// [`expand_empty_elements`]: Self::expand_empty_elements - /// [`check_end_names`]: Self::check_end_names + /// [`expand_empty_elements`]: crate::reader::Config::expand_empty_elements + /// [`check_end_names`]: crate::reader::Config::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { Ok(read_to_end!(self, end, buf, read_event_impl, { diff --git a/src/reader/mod.rs b/src/reader/mod.rs index e7b9c37a..48c16c75 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -11,153 +11,189 @@ use crate::reader::state::ReaderState; use memchr; -macro_rules! configure_methods { - ($($holder:ident)?) => { - /// Changes whether empty elements should be split into an `Open` and a `Close` event. - /// - /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `` are - /// expanded into a [`Start`] event followed by an [`End`] event. When set to `false` (the - /// default), those tags are represented by an [`Empty`] event instead. - /// - /// Note, that setting this to `true` will lead to additional allocates that - /// needed to store tag name for an [`End`] event. However if [`check_end_names`] - /// is also set, only one additional allocation will be performed that support - /// both these options. - /// - /// (`false` by default) - /// - /// [`Empty`]: Event::Empty - /// [`Start`]: Event::Start - /// [`End`]: Event::End - /// [`check_end_names`]: Self::check_end_names - pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .state.expand_empty_elements = val; - self - } +/// A struct that holds a parser configuration. +/// +/// Current parser configuration can be retrieved by calling [`Reader::config()`] +/// and changed by changing properties of the object returned by a call to +/// [`Reader::config_mut()`]. +/// +/// [`Reader::config()`]: crate::reader::Reader::config +/// [`Reader::config_mut()`]: crate::reader::Reader::config_mut +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] +#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] +#[non_exhaustive] +pub struct Config { + /// Whether comments should be validated. If enabled, in case of invalid comment + /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods. + /// + /// When set to `true`, every [`Comment`] event will be checked for not + /// containing `--`, which [is not allowed] in XML comments. Most of the time + /// we don't want comments at all so we don't really care about comment + /// correctness, thus the default value is `false` to improve performance. + /// + /// Default: `false` + /// + /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment + /// [`Comment`]: crate::events::Event::Comment + /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments + pub check_comments: bool, + + /// Whether mismatched closing tag names should be detected. If enabled, in + /// case of mismatch the [`Error::IllFormed(MismatchedEnd)`] is returned from + /// read methods. + /// + /// Note, that start and end tags [should match literally][spec], they cannot + /// have different prefixes even if both prefixes resolve to the same namespace. + /// The XML + /// + /// ```xml + /// + /// + /// ``` + /// + /// is not valid, even though semantically the start tag is the same as the + /// end tag. The reason is that namespaces are an extension of the original + /// XML specification (without namespaces) and it should be backward-compatible. + /// + /// When set to `false`, it won't check if a closing tag matches the corresponding + /// opening tag. For example, `` will be permitted. + /// + /// If the XML is known to be sane (already processed, etc.) this saves extra time. + /// + /// Note that the emitted [`End`] event will not be modified if this is disabled, + /// ie. it will contain the data of the mismatched end tag. + /// + /// Note, that setting this to `true` will lead to additional allocates that + /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`] + /// is also set, only one additional allocation will be performed that support + /// both these options. + /// + /// Default: `true` + /// + /// [`Error::IllFormed(MismatchedEnd)`]: crate::errors::IllFormedError::MismatchedEnd + /// [spec]: https://www.w3.org/TR/xml11/#dt-etag + /// [`End`]: crate::events::Event::End + /// [`expand_empty_elements`]: Self::expand_empty_elements + pub check_end_names: bool, - /// Changes whether whitespace before and after character data should be removed. - /// - /// When set to `true`, all [`Text`] events are trimmed. - /// If after that the event is empty it will not be pushed. - /// - /// Changing this option automatically changes the [`trim_text_end`] option. - /// - /// (`false` by default). - /// - ///
- /// - /// WARNING: With this option every text events will be trimmed which is - /// incorrect behavior when text events delimited by comments, processing - /// instructions or CDATA sections. To correctly trim data manually apply - /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] - /// only to necessary events. - ///
- /// - /// [`Text`]: Event::Text - /// [`trim_text_end`]: Self::trim_text_end - /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start - /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end - pub fn trim_text(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .state.trim_text_start = val; - self $(.$holder)? .state.trim_text_end = val; - self - } + /// Whether empty elements should be split into an `Open` and a `Close` event. + /// + /// When set to `true`, all [`Empty`] events produced by a self-closing tag + /// like `` are expanded into a [`Start`] event followed by an [`End`] + /// event. When set to `false` (the default), those tags are represented by + /// an [`Empty`] event instead. + /// + /// Note, that setting this to `true` will lead to additional allocates that + /// needed to store tag name for an [`End`] event. However if [`check_end_names`] + /// is also set, only one additional allocation will be performed that support + /// both these options. + /// + /// Default: `false` + /// + /// [`Empty`]: crate::events::Event::Empty + /// [`Start`]: crate::events::Event::Start + /// [`End`]: crate::events::Event::End + /// [`check_end_names`]: Self::check_end_names + pub expand_empty_elements: bool, + + /// Whether trailing whitespace after the markup name are trimmed in closing + /// tags ``. + /// + /// If `true` the emitted [`End`] event is stripped of trailing whitespace + /// after the markup name. + /// + /// Note that if set to `false` and [`check_end_names`] is `true` the comparison + /// of markup names is going to fail erroneously if a closing tag contains + /// trailing whitespace. + /// + /// Default: `true` + /// + /// [`End`]: crate::events::Event::End + /// [`check_end_names`]: Self::check_end_names + pub trim_markup_names_in_closing_tags: bool, - /// Changes whether whitespace after character data should be removed. - /// - /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. - /// If after that the event is empty it will not be pushed. - /// - /// (`false` by default). - /// - ///
- /// - /// WARNING: With this option every text events will be trimmed which is - /// incorrect behavior when text events delimited by comments, processing - /// instructions or CDATA sections. To correctly trim data manually apply - /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] - /// only to necessary events. - ///
- /// - /// [`Text`]: Event::Text - /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start - /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end - pub fn trim_text_end(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .state.trim_text_end = val; - self - } + /// Whether whitespace before character data should be removed. + /// + /// When set to `true`, leading whitespace is trimmed in [`Text`] events. + /// If after that the event is empty it will not be pushed. + /// + /// Default: `false` + /// + ///
+ /// + /// WARNING: With this option every text events will be trimmed which is + /// incorrect behavior when text events delimited by comments, processing + /// instructions or CDATA sections. To correctly trim data manually apply + /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] + /// only to necessary events. + ///
+ /// + /// [`Text`]: crate::events::Event::Text + /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start + /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end + pub trim_text_start: bool, - /// Changes whether trailing whitespaces after the markup name are trimmed in closing tags - /// ``. - /// - /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name. - /// - /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is - /// going to fail erroneously if a closing tag contains trailing whitespaces. - /// - /// (`true` by default) - /// - /// [`End`]: Event::End - pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .state.trim_markup_names_in_closing_tags = val; - self - } + /// Whether whitespace after character data should be removed. + /// + /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. + /// If after that the event is empty it will not be pushed. + /// + /// Default: `false` + /// + ///
+ /// + /// WARNING: With this option every text events will be trimmed which is + /// incorrect behavior when text events delimited by comments, processing + /// instructions or CDATA sections. To correctly trim data manually apply + /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] + /// only to necessary events. + ///
+ /// + /// [`Text`]: crate::events::Event::Text + /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start + /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end + pub trim_text_end: bool, +} - /// Changes whether mismatched closing tag names should be detected. - /// - /// Note, that start and end tags [should match literally][spec], they cannot - /// have different prefixes even if both prefixes resolve to the same namespace. - /// The XML - /// - /// ```xml - /// - /// - /// ``` - /// - /// is not valid, even though semantically the start tag is the same as the - /// end tag. The reason is that namespaces are an extension of the original - /// XML specification (without namespaces) and it should be backward-compatible. - /// - /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag. - /// For example, `` will be permitted. - /// - /// If the XML is known to be sane (already processed, etc.) this saves extra time. - /// - /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will - /// contain the data of the mismatched end tag. - /// - /// Note, that setting this to `true` will lead to additional allocates that - /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`] - /// is also set, only one additional allocation will be performed that support - /// both these options. - /// - /// (`true` by default) - /// - /// [spec]: https://www.w3.org/TR/xml11/#dt-etag - /// [`End`]: Event::End - /// [`expand_empty_elements`]: Self::expand_empty_elements - pub fn check_end_names(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .state.check_end_names = val; - self - } +impl Config { + /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value. + /// + ///
+ /// + /// WARNING: With this option every text events will be trimmed which is + /// incorrect behavior when text events delimited by comments, processing + /// instructions or CDATA sections. To correctly trim data manually apply + /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] + /// only to necessary events. + ///
+ /// + /// [`trim_text_start`]: Self::trim_text_start + /// [`trim_text_end`]: Self::trim_text_end + /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start + /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end + #[inline] + pub fn trim_text(&mut self, trim: bool) { + self.trim_text_start = trim; + self.trim_text_end = trim; + } +} - /// Changes whether comments should be validated. - /// - /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which - /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't - /// really care about comment correctness, thus the default value is `false` to improve - /// performance. - /// - /// (`false` by default) - /// - /// [`Comment`]: Event::Comment - pub fn check_comments(&mut self, val: bool) -> &mut Self { - self $(.$holder)? .state.check_comments = val; - self +impl Default for Config { + fn default() -> Self { + Self { + check_comments: false, + check_end_names: true, + expand_empty_elements: false, + trim_markup_names_in_closing_tags: true, + trim_text_start: false, + trim_text_end: false, } - }; + } } +//////////////////////////////////////////////////////////////////////////////////////////////////// + macro_rules! read_event_impl { ( $self:ident, $buf:ident, @@ -205,6 +241,9 @@ macro_rules! read_event_impl { }; }; match event { + // #513: In case of ill-formed errors we already consume the wrong data + // and change the state. We can continue parsing if we wish + Err(Error::IllFormed(_)) => {} Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Exit, _ => {} } @@ -213,7 +252,7 @@ macro_rules! read_event_impl { } /// Read bytes up to `<` and skip it. If current byte (after skipping all space -/// characters if [`ReaderState::trim_text_start`] is `true`) is already `<`, then +/// characters if [`Config::trim_text_start`] is `true`) is already `<`, then /// returns the next event, otherwise stay at position just after the `<` symbol. /// /// Moves parser to the `OpenedTag` state. @@ -230,7 +269,7 @@ macro_rules! read_until_open { ) => {{ $self.state.state = ParseState::OpenedTag; - if $self.state.trim_text_start { + if $self.state.config.trim_text_start { $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?; } @@ -369,7 +408,7 @@ pub type Span = Range; //////////////////////////////////////////////////////////////////////////////////////////////////// /// Possible reader states. The state transition diagram (`true` and `false` shows -/// value of [`Reader::expand_empty_elements()`] option): +/// value of [`Config::expand_empty_elements`] option): /// /// ```mermaid /// flowchart LR @@ -406,7 +445,7 @@ enum ParseState { /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], /// after which reader returned to the `ClosedTag` state. /// - /// [`expand_empty_elements`]: ReaderState::expand_empty_elements + /// [`expand_empty_elements`]: Config::expand_empty_elements Empty, /// Reader enters this state when `Eof` event generated or an error occurred. /// This is the last state, the reader stay in it forever. @@ -480,7 +519,7 @@ impl EncodingRef { /// Test 2 /// "#; /// let mut reader = Reader::from_str(xml); -/// reader.trim_text(true); +/// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); @@ -534,7 +573,15 @@ impl Reader { } } - configure_methods!(); + /// Returns reference to the parser configuration + pub fn config(&self) -> &Config { + &self.state.config + } + + /// Returns mutable reference to the parser configuration + pub fn config_mut(&mut self) -> &mut Config { + &mut self.state.config + } } /// Getters diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 60d56a7c..d5b79e78 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -13,7 +13,7 @@ use std::path::Path; use crate::errors::Result; use crate::events::Event; use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult}; -use crate::reader::{Reader, Span, XmlSource}; +use crate::reader::{Config, Reader, Span, XmlSource}; /// A low level encoding-agnostic XML event reader that performs namespace resolution. /// @@ -37,7 +37,17 @@ impl NsReader { Self::new(Reader::from_reader(reader)) } - configure_methods!(reader); + /// Returns reference to the parser configuration + #[inline] + pub fn config(&self) -> &Config { + self.reader.config() + } + + /// Returns mutable reference to the parser configuration + #[inline] + pub fn config_mut(&mut self) -> &mut Config { + self.reader.config_mut() + } } /// Private methods @@ -253,7 +263,7 @@ impl NsReader { /// xmlns='root namespace' /// xmlns:p='other namespace'/> /// "); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// match reader.read_event().unwrap() { /// Event::Empty(e) => { @@ -309,7 +319,7 @@ impl NsReader { /// Test 2 /// /// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); @@ -367,7 +377,7 @@ impl NsReader { /// Test 2 /// /// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); @@ -474,7 +484,7 @@ impl NsReader { /// ///
/// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); @@ -504,12 +514,12 @@ impl NsReader { /// [`IllFormed`]: crate::errors::Error::IllFormed /// [`read_to_end()`]: Self::read_to_end /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end - /// [`expand_empty_elements`]: Self::expand_empty_elements + /// [`expand_empty_elements`]: Config::expand_empty_elements /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should - // match literally the start name. See `Self::check_end_names` documentation + // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end_into(end, buf) } } @@ -555,7 +565,7 @@ impl<'i> NsReader<&'i [u8]> { /// Test 2 /// /// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); @@ -616,7 +626,7 @@ impl<'i> NsReader<&'i [u8]> { /// Test 2 /// /// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); @@ -712,7 +722,7 @@ impl<'i> NsReader<&'i [u8]> { /// ///
/// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); @@ -740,12 +750,12 @@ impl<'i> NsReader<&'i [u8]> { /// [`End`]: Event::End /// [`IllFormed`]: crate::errors::Error::IllFormed /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end - /// [`expand_empty_elements`]: Self::expand_empty_elements + /// [`expand_empty_elements`]: Config::expand_empty_elements /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end(&mut self, end: QName) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should - // match literally the start name. See `Self::check_end_names` documentation + // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end(end) } @@ -786,7 +796,7 @@ impl<'i> NsReader<&'i [u8]> { ///

For example, elements not needed to be "closed" /// /// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); @@ -794,7 +804,7 @@ impl<'i> NsReader<&'i [u8]> { /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... - /// reader.check_end_names(false); + /// reader.config_mut().check_end_names = false; /// /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. @@ -806,7 +816,7 @@ impl<'i> NsReader<&'i [u8]> { /// "#)); /// /// // Now we can enable checks again - /// reader.check_end_names(true); + /// reader.config_mut().check_end_names = true; /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 1cfdb36b..8f59ca04 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -54,7 +54,7 @@ impl<'a> Reader<&'a [u8]> { /// Test 2 /// /// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); @@ -131,7 +131,7 @@ impl<'a> Reader<&'a [u8]> { /// /// /// "#); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); @@ -151,8 +151,8 @@ impl<'a> Reader<&'a [u8]> { /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end - /// [`expand_empty_elements`]: Self::expand_empty_elements - /// [`check_end_names`]: Self::check_end_names + /// [`expand_empty_elements`]: crate::reader::Config::expand_empty_elements + /// [`check_end_names`]: crate::reader::Config::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end(&mut self, end: QName) -> Result { Ok(read_to_end!(self, end, (), read_event_impl, {})) @@ -195,7 +195,7 @@ impl<'a> Reader<&'a [u8]> { ///

For example, elements not needed to be "closed" /// /// "); - /// reader.trim_text(true); + /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); @@ -203,7 +203,7 @@ impl<'a> Reader<&'a [u8]> { /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... - /// reader.check_end_names(false); + /// reader.config_mut().check_end_names = false; /// /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. @@ -216,7 +216,7 @@ impl<'a> Reader<&'a [u8]> { /// assert!(matches!(text, Cow::Borrowed(_))); /// /// // Now we can enable checks again - /// reader.check_end_names(true); + /// reader.config_mut().check_end_names = true; /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); diff --git a/src/reader/state.rs b/src/reader/state.rs index 6169cf63..3371cf44 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -6,7 +6,7 @@ use crate::errors::{Error, IllFormedError, Result, SyntaxError}; use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; -use crate::reader::{is_whitespace, BangType, ParseState}; +use crate::reader::{is_whitespace, BangType, Config, ParseState}; use memchr; @@ -19,18 +19,8 @@ pub(super) struct ReaderState { pub offset: usize, /// Defines how to process next byte pub state: ParseState, - /// Expand empty element into an opening and closing element - pub expand_empty_elements: bool, - /// Trims leading whitespace in Text events, skip the element if text is empty - pub trim_text_start: bool, - /// Trims trailing whitespace in Text events. - pub trim_text_end: bool, - /// Trims trailing whitespaces from markup names in closing tags `` - pub trim_markup_names_in_closing_tags: bool, - /// Check if [`Event::End`] nodes match last [`Event::Start`] node - pub check_end_names: bool, - /// Check if comments contains `--` (false per default) - pub check_comments: bool, + /// User-defined settings that affect parsing + pub config: Config, /// All currently Started elements which didn't have a matching /// End element yet. /// @@ -68,7 +58,7 @@ impl ReaderState { pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> Result> { let mut content = bytes; - if self.trim_text_end { + if self.config.trim_text_end { // Skip the ending '<' let len = bytes .iter() @@ -91,7 +81,7 @@ impl ReaderState { match bang_type { BangType::Comment if buf.starts_with(b"!--") => { debug_assert!(buf.ends_with(b"--")); - if self.check_comments { + if self.config.check_comments { // search if '--' not in comments if let Some(p) = memchr::memchr_iter(b'-', &buf[3..len - 2]) .position(|p| buf[3 + p + 1] == b'-') @@ -130,13 +120,13 @@ impl ReaderState { } /// Wraps content of `buf` into the [`Event::End`] event. Does the check that - /// end name matches the last opened start name if `self.check_end_names` is set. + /// end name matches the last opened start name if `self.config.check_end_names` is set. pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result> { // Strip the `/` character. `content` contains data between `` let content = &buf[1..]; // XML standard permits whitespaces after the markup name in closing tags. // Let's strip them from the buffer before comparing tag names. - let name = if self.trim_markup_names_in_closing_tags { + let name = if self.config.trim_markup_names_in_closing_tags { if let Some(pos_end_name) = content.iter().rposition(|&b| !is_whitespace(b)) { &content[..pos_end_name + 1] } else { @@ -151,7 +141,7 @@ impl ReaderState { // Get the index in self.opened_buffer of the name of the last opened tag match self.opened_starts.pop() { Some(start) => { - if self.check_end_names { + if self.config.check_end_names { let expected = &self.opened_buffer[start..]; if name != expected { let expected = decoder.decode(expected).unwrap_or_default().into_owned(); @@ -224,7 +214,7 @@ impl ReaderState { let name_len = if name_end < len { name_end } else { len - 1 }; let event = BytesStart::wrap(&content[..len - 1], name_len); - if self.expand_empty_elements { + if self.config.expand_empty_elements { self.state = ParseState::Empty; self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(&content[..name_len]); @@ -273,12 +263,7 @@ impl Default for ReaderState { Self { offset: 0, state: ParseState::Init, - expand_empty_elements: false, - trim_text_start: false, - trim_text_end: false, - trim_markup_names_in_closing_tags: true, - check_end_names: true, - check_comments: false, + config: Config::default(), opened_buffer: Vec::new(), opened_starts: Vec::new(), diff --git a/src/writer.rs b/src/writer.rs index 9533721d..122eb488 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -27,7 +27,7 @@ use {crate::de::DeError, serde::Serialize}; /// /// let xml = r#"text"#; /// let mut reader = Reader::from_str(xml); -/// reader.trim_text(true); +/// reader.config_mut().trim_text(true); /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// loop { /// match reader.read_event() { diff --git a/tests/encodings.rs b/tests/encodings.rs index a8b57e13..de039ed4 100644 --- a/tests/encodings.rs +++ b/tests/encodings.rs @@ -31,7 +31,7 @@ fn test_koi8_r_encoding() { let src = include_bytes!("documents/opennews_all.rss").as_ref(); let mut buf = vec![]; let mut r = Reader::from_reader(src); - r.trim_text(true); + r.config_mut().trim_text(true); loop { match r.read_event_into(&mut buf) { Ok(Event::Text(e)) => { diff --git a/tests/issues.rs b/tests/issues.rs index dc4103b4..da9918d2 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -17,7 +17,7 @@ fn issue94() { "#; let mut reader = Reader::from_reader(&data[..]); - reader.trim_text(true); + reader.config_mut().trim_text(true); loop { match reader.read_event() { Ok(Event::Eof) | Err(..) => break, @@ -115,11 +115,11 @@ mod issue514 { assert_eq!(reader.read_event().unwrap(), Event::Start(outer_start)); assert_eq!(reader.read_event().unwrap(), Event::Start(html_start)); - reader.check_end_names(false); + reader.config_mut().check_end_names = false; assert_eq!(reader.read_text(html_end.name()).unwrap(), "..."); - reader.check_end_names(true); + reader.config_mut().check_end_names = true; assert_eq!(reader.read_event().unwrap(), Event::End(outer_end)); assert_eq!(reader.read_event().unwrap(), Event::Eof); @@ -138,11 +138,11 @@ mod issue514 { assert_eq!(reader.read_event().unwrap(), Event::Start(outer_start)); assert_eq!(reader.read_event().unwrap(), Event::Start(html_start)); - reader.check_end_names(false); + reader.config_mut().check_end_names = false; assert_eq!(reader.read_text(html_end.name()).unwrap(), "..."); - reader.check_end_names(true); + reader.config_mut().check_end_names = true; match reader.read_event() { Err(Error::IllFormed(cause)) => assert_eq!( diff --git a/tests/namespaces.rs b/tests/namespaces.rs index 0f68ad53..749aed7a 100644 --- a/tests/namespaces.rs +++ b/tests/namespaces.rs @@ -9,7 +9,7 @@ use std::borrow::Cow; #[test] fn namespace() { let mut r = NsReader::from_str("in namespace!"); - r.trim_text(true); + r.config_mut().trim_text(true); // match r.read_resolved_event() { @@ -52,7 +52,7 @@ fn namespace() { #[test] fn default_namespace() { let mut r = NsReader::from_str(r#""#); - r.trim_text(true); + r.config_mut().trim_text(true); // match r.read_resolved_event() { @@ -91,7 +91,7 @@ fn default_namespace() { #[test] fn default_namespace_reset() { let mut r = NsReader::from_str(r#""#); - r.trim_text(true); + r.config_mut().trim_text(true); // match r.read_resolved_event() { @@ -134,7 +134,7 @@ fn attributes_empty_ns() { let src = ""; let mut r = NsReader::from_str(src); - r.trim_text(true); + r.config_mut().trim_text(true); let e = match r.read_resolved_event() { Ok((Unbound, Empty(e))) => e, @@ -173,7 +173,9 @@ fn attributes_empty_ns_expanded() { let src = ""; let mut r = NsReader::from_str(src); - r.trim_text(true).expand_empty_elements(true); + let config = r.config_mut(); + config.trim_text(true); + config.expand_empty_elements = true; { let e = match r.read_resolved_event() { Ok((Unbound, Start(e))) => e, @@ -215,7 +217,7 @@ fn default_ns_shadowing_empty() { let src = ""; let mut r = NsReader::from_str(src); - r.trim_text(true); + r.config_mut().trim_text(true); // { @@ -272,7 +274,9 @@ fn default_ns_shadowing_expanded() { let src = ""; let mut r = NsReader::from_str(src); - r.trim_text(true).expand_empty_elements(true); + let config = r.config_mut(); + config.trim_text(true); + config.expand_empty_elements = true; // { @@ -343,7 +347,7 @@ fn reserved_name() { // Name "xmlns-something" is reserved according to spec, because started with "xml" let mut r = NsReader::from_str(r#""#); - r.trim_text(true); + r.config_mut().trim_text(true); // match r.read_resolved_event() { diff --git a/tests/reader-config.rs b/tests/reader-config.rs new file mode 100644 index 00000000..7701b0d8 --- /dev/null +++ b/tests/reader-config.rs @@ -0,0 +1,825 @@ +//! Contains tests for config options of a parser. +//! +//! Each module has a name of a corresponding option and functions inside performs +//! testing of various option values. +//! +//! Please keep tests sorted (exceptions are allowed if options are tightly related). + +use quick_xml::errors::{Error, IllFormedError}; +use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event}; +use quick_xml::reader::Reader; + +mod check_comments { + use super::*; + + mod false_ { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn empty() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_comments = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped("")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn normal() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_comments = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(" comment ")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn dashes_inside() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_comments = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(" comment -- ")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn three_dashes_in_the_end() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_comments = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(" comment -")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn comment_is_gt() { + let mut reader = Reader::from_str("-->"); + reader.config_mut().check_comments = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(">")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn comment_is_dash_gt() { + let mut reader = Reader::from_str("-->"); + reader.config_mut().check_comments = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped("->")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + } + + mod true_ { + use super::*; + use pretty_assertions::assert_eq; + + /// XML grammar allows ``. The simplified adapted part of full grammar + /// can be tried online at https://peggyjs.org/online: + /// + /// ```pegjs + /// comment = '' + /// char = [^-]i + /// ``` + /// + /// The original grammar: https://www.w3.org/TR/xml11/#sec-comments + #[test] + fn empty() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_comments = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped("")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn normal() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_comments = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(" comment ")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn dashes_inside() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_comments = true; + + match reader.read_event() { + Err(Error::IllFormed(cause)) => { + assert_eq!(cause, IllFormedError::DoubleHyphenInComment) + } + x => panic!("Expected `Err(IllFormed(_))`, but got `{:?}`", x), + } + // #513: We want to continue parsing after the error + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn three_dashes_in_the_end() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_comments = true; + + match reader.read_event() { + Err(Error::IllFormed(cause)) => { + assert_eq!(cause, IllFormedError::DoubleHyphenInComment) + } + x => panic!("Expected `Err(IllFormed(_))`, but got `{:?}`", x), + } + // #513: We want to continue parsing after the error + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn comment_is_gt() { + let mut reader = Reader::from_str("-->"); + reader.config_mut().check_comments = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(">")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn comment_is_dash_gt() { + let mut reader = Reader::from_str("-->"); + reader.config_mut().check_comments = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped("->")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + } +} + +mod check_end_names { + use super::*; + + mod false_ { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn matched_tags() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_end_names = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn mismatched_tags() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_end_names = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("mismatched")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + } + + mod true_ { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn matched_tags() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_end_names = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn mismatched_tags() { + let mut reader = Reader::from_str(""); + reader.config_mut().check_end_names = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("tag")) + ); + match reader.read_event() { + Err(Error::IllFormed(cause)) => assert_eq!( + cause, + IllFormedError::MismatchedEnd { + expected: "tag".into(), + found: "mismatched".into(), + } + ), + x => panic!("Expected `Err(IllFormed(_))`, but got `{:?}`", x), + } + // #513: We want to continue parsing after the error + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("tag")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + } +} + +mod expand_empty_elements { + use super::*; + use pretty_assertions::assert_eq; + + /// Self-closed elements should be reported as one `Empty` event + #[test] + fn false_() { + let mut reader = Reader::from_str(""); + reader.config_mut().expand_empty_elements = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::new("root")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + /// Self-closed elements should be reported as two events + #[test] + fn true_() { + let mut reader = Reader::from_str(""); + reader.config_mut().expand_empty_elements = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("root")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } +} + +mod trim_markup_names_in_closing_tags { + use super::*; + use pretty_assertions::assert_eq; + + mod false_ { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn check_end_names_false() { + let mut reader = Reader::from_str(""); + reader.config_mut().trim_markup_names_in_closing_tags = false; + // We need to disable checks, otherwise the error will be returned when read end + reader.config_mut().check_end_names = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("root")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root \t\r\n")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn check_end_names_true() { + let mut reader = Reader::from_str(""); + reader.config_mut().trim_markup_names_in_closing_tags = false; + reader.config_mut().check_end_names = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("root")) + ); + match reader.read_event() { + Err(Error::IllFormed(cause)) => assert_eq!( + cause, + IllFormedError::MismatchedEnd { + expected: "root".into(), + found: "root \t\r\n".into(), + } + ), + x => panic!("Expected `Err(IllFormed(_))`, but got `{:?}`", x), + } + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + } + + #[test] + fn true_() { + let mut reader = Reader::from_str(""); + reader.config_mut().trim_markup_names_in_closing_tags = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::new("root")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } +} + +const XML: &str = " \t\r\n\ + \t\r\n\ + \t\r\n\ + \t\r\n\ + text \t\r\n\ + \t\r\n\ + \t\r\n\ + \t\r\n\ + \t\r\n"; + +mod trim_text { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn false_() { + let mut reader = Reader::from_str(XML); + reader.config_mut().trim_text(false); + + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::DocType(BytesText::new("root \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::from_content("root \t\r\n", 4)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::from_content("empty \t\r\n", 5)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\ntext \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(" comment \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::PI(BytesText::new("pi \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn true_() { + let mut reader = Reader::from_str(XML); + reader.config_mut().trim_text(true); + + assert_eq!( + reader.read_event().unwrap(), + Event::DocType(BytesText::new("root \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::from_content("root \t\r\n", 4)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::from_content("empty \t\r\n", 5)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new("text")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(" comment \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::PI(BytesText::new("pi \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } +} + +mod trim_text_start { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn false_() { + let mut reader = Reader::from_str(XML); + reader.config_mut().trim_text_start = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::DocType(BytesText::new("root \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::from_content("root \t\r\n", 4)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::from_content("empty \t\r\n", 5)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\ntext \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::new(" comment \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::PI(BytesText::new("pi \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + #[test] + fn true_() { + let mut reader = Reader::from_str(XML); + reader.config_mut().trim_text_start = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::DocType(BytesText::new("root \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::from_content("root \t\r\n", 4)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::from_content("empty \t\r\n", 5)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new("text \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::new(" comment \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::PI(BytesText::new("pi \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } +} + +mod trim_text_end { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn false_() { + let mut reader = Reader::from_str(XML); + reader.config_mut().trim_text_end = false; + + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::DocType(BytesText::new("root \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::from_content("root \t\r\n", 4)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::from_content("empty \t\r\n", 5)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\ntext \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(" comment \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::PI(BytesText::new("pi \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\n")) + ); + + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } + + // TODO: Enable test after rewriting parser + #[test] + #[ignore = "currently it is hard to fix incorrect behavior, but this will much easy after parser rewrite"] + fn true_() { + let mut reader = Reader::from_str(XML); + reader.config_mut().trim_text_end = true; + + assert_eq!( + reader.read_event().unwrap(), + Event::DocType(BytesText::new("root \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Start(BytesStart::from_content("root \t\r\n", 4)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Empty(BytesStart::from_content("empty \t\r\n", 5)) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Text(BytesText::new(" \t\r\ntext")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::Comment(BytesText::from_escaped(" comment \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::CData(BytesCData::new(" \t\r\ncdata \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::PI(BytesText::new("pi \t\r\n")) + ); + assert_eq!( + reader.read_event().unwrap(), + Event::End(BytesEnd::new("root")) + ); + assert_eq!(reader.read_event().unwrap(), Event::Eof); + } +} diff --git a/tests/test.rs b/tests/test.rs index 3104e6dd..58887b88 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -26,7 +26,7 @@ fn test_sample() { fn test_attributes_empty() { let src = ""; let mut r = Reader::from_str(src); - r.trim_text(true); + r.config_mut().trim_text(true); match r.read_event() { Ok(Empty(e)) => { let mut attrs = e.attributes(); @@ -54,7 +54,7 @@ fn test_attributes_empty() { fn test_attribute_equal() { let src = ""; let mut r = Reader::from_str(src); - r.trim_text(true); + r.config_mut().trim_text(true); match r.read_event() { Ok(Empty(e)) => { let mut attrs = e.attributes(); @@ -71,59 +71,10 @@ fn test_attribute_equal() { } } -#[test] -fn test_comment_starting_with_gt() { - let src = "-->"; - let mut r = Reader::from_str(src); - r.trim_text(true); - loop { - match r.read_event() { - Ok(Comment(e)) => { - assert_eq!(e.as_ref(), b">"); - break; - } - Ok(Eof) => panic!("Expecting Comment"), - _ => (), - } - } -} - -#[test] -fn test_no_trim() { - let mut reader = Reader::from_str(" text "); - - assert!(matches!(reader.read_event().unwrap(), Text(_))); - assert!(matches!(reader.read_event().unwrap(), Start(_))); - assert!(matches!(reader.read_event().unwrap(), Text(_))); - assert!(matches!(reader.read_event().unwrap(), End(_))); - assert!(matches!(reader.read_event().unwrap(), Text(_))); -} - -#[test] -fn test_trim_end() { - let mut reader = Reader::from_str(" text "); - reader.trim_text_end(true); - - assert!(matches!(reader.read_event().unwrap(), Text(_))); - assert!(matches!(reader.read_event().unwrap(), Start(_))); - assert!(matches!(reader.read_event().unwrap(), Text(_))); - assert!(matches!(reader.read_event().unwrap(), End(_))); -} - -#[test] -fn test_trim() { - let mut reader = Reader::from_str(" text "); - reader.trim_text(true); - - assert!(matches!(reader.read_event().unwrap(), Start(_))); - assert!(matches!(reader.read_event().unwrap(), Text(_))); - assert!(matches!(reader.read_event().unwrap(), End(_))); -} - #[test] fn test_clone_reader() { let mut reader = Reader::from_str("text"); - reader.trim_text(true); + reader.config_mut().trim_text(true); assert!(matches!(reader.read_event().unwrap(), Start(_))); diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 01552a18..cbb96338 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -56,77 +56,63 @@ macro_rules! next_eq { #[test] fn test_start() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Start, b"a"); } #[test] fn test_start_end() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Start, b"a", End, b"a"); } #[test] fn test_start_end_with_ws() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Start, b"a", End, b"a"); } #[test] fn test_start_end_attr() { let mut r = Reader::from_str(""); - r.trim_text(true); - next_eq!(r, Start, b"a", End, b"a"); -} - -#[test] -fn test_empty() { - let mut r = Reader::from_str(""); - r.trim_text(true); - next_eq!(r, Empty, b"a"); -} - -#[test] -fn test_empty_can_be_expanded() { - let mut r = Reader::from_str(""); - r.trim_text(true).expand_empty_elements(true); + r.config_mut().trim_text(true); next_eq!(r, Start, b"a", End, b"a"); } #[test] fn test_empty_attr() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Empty, b"a"); } #[test] fn test_start_end_comment() { let mut r = Reader::from_str(" "); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Start, b"b", Empty, b"a", Empty, b"a", Comment, b"t", End, b"b"); } #[test] fn test_start_txt_end() { let mut r = Reader::from_str("test"); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Start, b"a", Text, b"test", End, b"a"); } #[test] fn test_comment() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Comment, b"test"); } #[test] fn test_xml_decl() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); match r.read_event().unwrap() { Decl(ref e) => { match e.version() { @@ -157,42 +143,31 @@ fn test_xml_decl() { } } -#[test] -fn test_trim_test() { - let txt = " "; - let mut r = Reader::from_str(txt); - r.trim_text(true); - next_eq!(r, Start, b"a", Start, b"b", End, b"b", End, b"a"); - - let mut r = Reader::from_str(txt); - next_eq!(r, Start, b"a", Start, b"b", Text, b" ", End, b"b", End, b"a"); -} - #[test] fn test_cdata() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, CData, b"test"); } #[test] fn test_cdata_open_close() { let mut r = Reader::from_str(" test]]>"); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, CData, b"test <> test"); } #[test] fn test_start_attr() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Start, b"a"); } #[test] fn test_nested() { let mut r = Reader::from_str("test"); - r.trim_text(true); + r.config_mut().trim_text(true); next_eq!(r, Start, b"a", Start, b"b", Text, b"test", End, b"b", Empty, b"c", End, b"a"); } @@ -200,7 +175,7 @@ fn test_nested() { fn test_writer() -> Result<()> { let txt = include_str!("../tests/documents/test_writer.xml").trim(); let mut reader = Reader::from_str(txt); - reader.trim_text(true); + reader.config_mut().trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); loop { match reader.read_event()? { @@ -218,7 +193,7 @@ fn test_writer() -> Result<()> { fn test_writer_borrow() -> Result<()> { let txt = include_str!("../tests/documents/test_writer.xml").trim(); let mut reader = Reader::from_str(txt); - reader.trim_text(true); + reader.config_mut().trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); loop { match reader.read_event()? { @@ -236,7 +211,7 @@ fn test_writer_borrow() -> Result<()> { fn test_writer_indent() -> Result<()> { let txt = include_str!("../tests/documents/test_writer_indent.xml"); let mut reader = Reader::from_str(txt); - reader.trim_text(true); + reader.config_mut().trim_text(true); let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); loop { match reader.read_event()? { @@ -255,7 +230,7 @@ fn test_writer_indent() -> Result<()> { fn test_writer_indent_cdata() -> Result<()> { let txt = include_str!("../tests/documents/test_writer_indent_cdata.xml"); let mut reader = Reader::from_str(txt); - reader.trim_text(true); + reader.config_mut().trim_text(true); let mut writer = Writer::new_with_indent(Cursor::new(Vec::new()), b' ', 4); loop { match reader.read_event()? { @@ -295,7 +270,7 @@ fn test_write_attrs() -> Result<()> { let str_from = r#""#; let expected = r#""#; let mut reader = Reader::from_str(str_from); - reader.trim_text(true); + reader.config_mut().trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); loop { let event = match reader.read_event()? { @@ -401,7 +376,7 @@ fn test_new_xml_decl_empty() { #[test] fn test_offset_err_end_element() { let mut r = Reader::from_str(""); - r.trim_text(true); + r.config_mut().trim_text(true); match r.read_event() { Err(_) if r.buffer_position() == 0 => (), // error at char 0: no opening tag @@ -417,7 +392,7 @@ fn test_offset_err_end_element() { #[test] fn test_offset_err_comment() { let mut r = Reader::from_str(""#, - r#" - |Error: ill-formed document: forbidden string `--` was found in a comment - "#, - true, - ); - - test( - r#""#, - r#" - |Error: ill-formed document: forbidden string `--` was found in a comment - "#, - true, - ); - - // Canary test for correct comments - test( - r#""#, - r#" - |Comment( comment ) - |EmptyElement(hello) - |EndDocument - "#, - true, - ); -} - #[test] fn tabs_1() { test( @@ -383,7 +353,9 @@ fn test(input: &str, output: &str, trim: bool) { #[track_caller] fn test_bytes(input: &[u8], output: &[u8], trim: bool) { let mut reader = NsReader::from_reader(input); - reader.trim_text(trim).check_comments(true); + let config = reader.config_mut(); + config.trim_text(trim); + config.check_comments = true; let mut spec_lines = SpecIter(output).enumerate();