Skip to content

Commit

Permalink
Convert events and attributes to str and remove decoding methods
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Aug 29, 2022
1 parent c5068e7 commit 94fd296
Show file tree
Hide file tree
Showing 17 changed files with 487 additions and 712 deletions.
1 change: 0 additions & 1 deletion Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@
- [#416]: `BytesStart::to_borrowed` renamed to `BytesStart::borrow`, the same method
added to all events

- [#421]: `decode_and_unescape*` methods now does one less allocation if unescaping is not required
- [#421]: Removed ability to deserialize byte arrays from serde deserializer.
XML is not able to store binary data directly, you should always use some encoding
scheme, for example, HEX or Base64
Expand Down
8 changes: 4 additions & 4 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
match criterion::black_box(r.read_event()?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.unescape_value()?);
}
}
Event::Text(e) => {
Expand All @@ -75,7 +75,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
match criterion::black_box(r.read_event_into(&mut buf)?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.unescape_value()?);
}
}
Event::Text(e) => {
Expand All @@ -101,7 +101,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.unescape_value()?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand Down Expand Up @@ -129,7 +129,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.unescape_value()?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand Down
4 changes: 2 additions & 2 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
loop {
match reader.read_event() {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(e) {
for cap in entity_re.captures_iter(e.as_bytes()) {
custom_entities.insert(
reader.decoder().decode(&cap[1])?.into_owned(),
reader.decoder().decode(&cap[2])?.into_owned(),
Expand All @@ -46,7 +46,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
.attributes()
.map(|a| {
a.unwrap()
.decode_and_unescape_value_with(&reader, |ent| {
.unescape_value_with(|ent| {
custom_entities.get(ent).map(|s| s.as_str())
})
.unwrap()
Expand Down
97 changes: 39 additions & 58 deletions src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ where
/// ```
has_value_field: bool,
/// list of fields yet to unflatten (defined as starting with $unflatten=)
unflatten_fields: Vec<&'static [u8]>,
unflatten_fields: Vec<&'static str>,
}

impl<'de, 'a, R> MapAccess<'de, 'a, R>
Expand All @@ -211,7 +211,7 @@ where
unflatten_fields: fields
.iter()
.filter(|f| f.starts_with(UNFLATTEN_PREFIX))
.map(|f| f.as_bytes())
.map(|&f| f)
.collect(),
})
}
Expand All @@ -232,15 +232,12 @@ where
// FIXME: There error positions counted from the start of tag name - need global position
let slice = &self.start.buf;

if let Some(a) = self.iter.next(slice).transpose()? {
if let Some(a) = self.iter.next(slice.as_bytes()).transpose()? {
// try getting map from attributes (key= "value")
let (key, value) = a.into();
self.source = ValueSource::Attribute(value.unwrap_or_default());
seed.deserialize(EscapedDeserializer::new(
Cow::Borrowed(std::str::from_utf8(&slice[key]).expect("fixme dalley")),
false,
))
.map(Some)
seed.deserialize(EscapedDeserializer::new(Cow::Borrowed(&slice[key]), false))
.map(Some)
} else {
// try getting from events (<key>value</key>)
match self.de.peek()? {
Expand Down Expand Up @@ -273,26 +270,27 @@ where
}
DeEvent::Start(e) => {
self.source = ValueSource::Nested;
let key =
if let Some(p) = self.unflatten_fields.iter().position(|f| {
e.name().as_ref().as_bytes() == &f[UNFLATTEN_PREFIX.len()..]
}) {
// Used to deserialize elements, like:
// <root>
// <xxx>test</xxx>
// </root>
//
// into
//
// struct Root {
// #[serde(rename = "$unflatten=xxx")]
// xxx: String,
// }
seed.deserialize(self.unflatten_fields.remove(p).into_deserializer())
} else {
let name = Cow::Borrowed(e.local_name().into_inner());
seed.deserialize(EscapedDeserializer::new(name, false))
};
let key = if let Some(p) = self
.unflatten_fields
.iter()
.position(|f| e.name().as_ref() == &f[UNFLATTEN_PREFIX.len()..])
{
// Used to deserialize elements, like:
// <root>
// <xxx>test</xxx>
// </root>
//
// into
//
// struct Root {
// #[serde(rename = "$unflatten=xxx")]
// xxx: String,
// }
seed.deserialize(self.unflatten_fields.remove(p).into_deserializer())
} else {
let name = Cow::Borrowed(e.local_name().into_inner());
seed.deserialize(EscapedDeserializer::new(name, false))
};
key.map(Some)
}
// Stop iteration after reaching a closing tag
Expand All @@ -315,7 +313,6 @@ where
&self.start.buf,
value,
true,
self.de.reader.decoder(),
)),
// This arm processes the following XML shape:
// <any-tag>
Expand All @@ -327,16 +324,12 @@ where
// of that events)
// This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
ValueSource::Text => match self.de.next()? {
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.de.reader.decoder(),
)),
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.de.reader.decoder(),
)),
DeEvent::Text(e) => {
seed.deserialize(SimpleTypeDeserializer::from_cow(e.into_inner(), true))
}
DeEvent::CData(e) => {
seed.deserialize(SimpleTypeDeserializer::from_cow(e.into_inner(), false))
}
// SAFETY: We set `Text` only when we seen `Text` or `CData`
_ => unreachable!(),
},
Expand Down Expand Up @@ -722,32 +715,20 @@ where
// Comment to prevent auto-formatting and keep Text and Cdata similar
e.into_inner(),
true,
self.map.de.reader.decoder(),
)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.map.de.reader.decoder(),
)
.deserialize_seq(visitor),
DeEvent::CData(e) => {
SimpleTypeDeserializer::from_cow(e.into_inner(), false).deserialize_seq(visitor)
}
// This is a sequence element. We cannot treat it as another flatten
// sequence if type will require `deserialize_seq` We instead forward
// it to `xs:simpleType` implementation
DeEvent::Start(e) => {
let value = match self.map.de.next()? {
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.map.de.reader.decoder(),
)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.map.de.reader.decoder(),
)
.deserialize_seq(visitor),
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(e.into_inner(), true)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(e.into_inner(), false)
.deserialize_seq(visitor),
e => Err(DeError::Custom(format!("Unsupported event {:?}", e))),
};
// TODO: May be assert that here we expect only matching closing tag?
Expand Down
44 changes: 26 additions & 18 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ mod simple_type;
mod var;

pub use crate::errors::serialize::DeError;
use crate::escape::unescape;
use crate::{
encoding::{Decoder, Utf8BytesReader},
errors::Error,
Expand Down Expand Up @@ -490,18 +491,36 @@ where
/// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
fn next_text_impl(
&mut self,
unescape: bool,
do_unescape: bool,
allow_start: bool,
) -> Result<Cow<'de, str>, DeError> {
match self.next()? {
DeEvent::Text(e) => Ok(e.decode(unescape)?),
DeEvent::CData(e) => Ok(e.decode()?),
DeEvent::Text(e) => {
if do_unescape {
Ok(match unescape(e.as_ref())? {
Cow::Borrowed(_) => e.into_inner(),
Cow::Owned(s) => Cow::Owned(s),
})
} else {
Ok(e.into_inner())
}
}
DeEvent::CData(e) => Ok(e.into_inner()),
DeEvent::Start(e) if allow_start => {
// allow one nested level
let inner = self.next()?;
let t = match inner {
DeEvent::Text(t) => t.decode(unescape)?,
DeEvent::CData(t) => t.decode()?,
DeEvent::Text(t) => {
if do_unescape {
Ok(match unescape(t.as_ref())? {
Cow::Borrowed(_) => t.into_inner(),
Cow::Owned(s) => Cow::Owned(s),
})
} else {
Ok(t.into_inner())
}
}
DeEvent::CData(t) => Ok(t.into_inner()),
DeEvent::Start(s) => {
return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
}
Expand All @@ -516,7 +535,7 @@ where
DeEvent::Eof => return Err(DeError::UnexpectedEof),
};
self.read_to_end(e.name())?;
Ok(t)
t
}
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
Expand Down Expand Up @@ -774,7 +793,7 @@ where
}

/// Trait used by the deserializer for iterating over input. This is manually
/// "specialized" for iterating over `&[u8]`.
/// "specialized" for iterating over `&str`.
///
/// You do not need to implement this trait, it is needed to abstract from
/// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in
Expand All @@ -786,9 +805,6 @@ pub trait XmlRead<'i> {
/// Skips until end element is found. Unlike `next()` it will not allocate
/// when it cannot satisfy the lifetime.
fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;

/// A copy of the reader's decoder used to decode strings.
fn decoder(&self) -> Decoder;
}

/// XML input source that reads from a std::io input stream.
Expand Down Expand Up @@ -827,10 +843,6 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
Ok(_) => Ok(()),
}
}

fn decoder(&self) -> Decoder {
self.reader.decoder()
}
}

/// XML input source that reads from a slice of bytes and can borrow from it.
Expand Down Expand Up @@ -864,10 +876,6 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
Ok(_) => Ok(()),
}
}

fn decoder(&self) -> Decoder {
self.reader.decoder()
}
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 94fd296

Please sign in to comment.