From e77d1b2bacb0b9f47378751f1a6a7424e54f64e2 Mon Sep 17 00:00:00 2001 From: Petros Angelatos Date: Mon, 23 Sep 2024 19:09:21 +0300 Subject: [PATCH] preserve stucture of timezone designation list Signed-off-by: Petros Angelatos --- utils/tzif/src/data/tzif.rs | 14 +++ utils/tzif/src/parse/tzif.rs | 161 ++++++++++++++--------------------- 2 files changed, 78 insertions(+), 97 deletions(-) diff --git a/utils/tzif/src/data/tzif.rs b/utils/tzif/src/data/tzif.rs index 288249de69b..452bbfc48e2 100644 --- a/utils/tzif/src/data/tzif.rs +++ b/utils/tzif/src/data/tzif.rs @@ -297,3 +297,17 @@ pub struct DataBlock { /// A series of [`UtLocalIndicator`] objects. pub ut_local_indicators: Vec, } + +impl DataBlock { + /// Retrieves the timezone designation at index `idx`. + pub fn time_zone_designation(&self, mut idx: usize) -> Option<&str> { + self.time_zone_designations.iter().find_map(|d| { + if idx <= d.len() { + Some(&d[idx..]) + } else { + idx -= d.len() + 1; + None + } + }) + } +} diff --git a/utils/tzif/src/parse/tzif.rs b/utils/tzif/src/parse/tzif.rs index a8fd856daaa..8853a05a5e0 100644 --- a/utils/tzif/src/parse/tzif.rs +++ b/utils/tzif/src/parse/tzif.rs @@ -376,65 +376,40 @@ where count_min_max(typecnt, typecnt, local_time_type_record(charcnt)) } -/// A series of bytes constituting an array of -/// NUL-terminated (0x00) time zone designation strings. The total -/// number of bytes is specified by the "charcnt" field in the header. -fn raw_time_zone_designations(charcnt: usize) -> impl Parser -where - Input: Stream, - Input::Error: ParseError, -{ - count_min_max(charcnt, charcnt, any()) - .map(|bytes: Vec| String::from_utf8_lossy(&bytes).into_owned()) -} - /// A series of bytes constituting an array of /// NUL-terminated (0x00) time zone designation strings. The total /// number of bytes is specified by the "charcnt" field in the /// header. /// -/// Splits each designation into a vector of [`String`] where each string -/// starts at an index defined by a local time type record and ends at a -/// NUL-terminator (0x00) +/// Splits the list of bytes by the NULL-terminator (0x00) character +/// and puts each designation into a [`String`]. /// /// > e.g. /// > ```text /// > "LMT\u{0}HMT\u{0}MMT\u{0}IST\u{0}+0630\u{0}" /// > ``` /// -/// Note that two designations MAY overlap if one is a suffix -/// of the other. The character encoding of time zone designation -/// strings is not specified. +/// Note that a local time record index might point in the middle of a +/// designation. In that case the record's designation is the specified +/// suffix. The [DataBlock::time_zone_designation] method can be used to +/// access the correct designation string given an index. /// +/// The character encoding of time zone designation strings is not specified. /// However, time zone designations SHOULD consist of at least three (3) and no /// more than six (6) ASCII characters from the set of alphanumerics, /// '-', and '+'. This is for compatibility with POSIX requirements /// for time zone abbreviations, so this parser enforces a UTF-8 ASCII encoding, /// to ensure compatability with Rust strings. -fn time_zone_designations( - charcnt: usize, - local_time_type_records: Vec, -) -> impl Parser> +fn time_zone_designations(charcnt: usize) -> impl Parser> where Input: Stream, Input::Error: ParseError, { - raw_time_zone_designations(charcnt).map(move |raw_time_zone_designations| { - let mut time_zone_designations = Vec::with_capacity(local_time_type_records.len()); - for record in &local_time_type_records { - for end_idx in record.idx..charcnt { - if raw_time_zone_designations.as_bytes()[end_idx] == b'\0' { - time_zone_designations.push( - String::from_utf8_lossy( - raw_time_zone_designations[record.idx..end_idx].as_bytes(), - ) - .into_owned(), - ); - break; - } - } - } - time_zone_designations + count_min_max(charcnt, charcnt, any()).map(|bytes: Vec| { + bytes + .split_inclusive(|b| *b == 0) + .map(|s| String::from_utf8_lossy(&s[0..s.len() - 1]).into_owned()) + .collect() }) } @@ -678,45 +653,17 @@ where Input: Stream, Input::Error: ParseError, { - ( - historic_transition_times::(header.timecnt), - transition_types(header.timecnt, header.typecnt), - local_time_type_records(header.typecnt, header.charcnt), - ) - .then( - move |(transition_times, transition_types, local_time_type_records)| { - ( - value(transition_times), - value(transition_types), - value(local_time_type_records.clone()), - time_zone_designations(header.charcnt, local_time_type_records), - leap_second_records::(header.leapcnt), - standard_wall_indicators(header.isstdcnt), - ) - }, - ) - .then( - move |( - transition_times, - transition_types, - local_time_type_records, - time_zone_designations, - leap_second_records, - standard_wall_indicators, - )| { - combine::struct_parser! { - DataBlock { - transition_times: value(transition_times), - transition_types: value(transition_types), - local_time_type_records: value(local_time_type_records), - time_zone_designations: value(time_zone_designations), - leap_second_records: value(leap_second_records), - standard_wall_indicators: value(standard_wall_indicators), - ut_local_indicators: ut_local_indicators(header.isutcnt), - } - } - }, - ) + combine::struct_parser! { + DataBlock { + transition_times: historic_transition_times::(header.timecnt), + transition_types: transition_types(header.timecnt, header.typecnt), + local_time_type_records: local_time_type_records(header.typecnt, header.charcnt), + time_zone_designations: time_zone_designations(header.charcnt), + leap_second_records: leap_second_records::(header.leapcnt), + standard_wall_indicators: standard_wall_indicators(header.isstdcnt), + ut_local_indicators: ut_local_indicators(header.isutcnt), + } + } } /// Parses a `TZif` footer. @@ -1196,31 +1143,51 @@ mod test { #[test] fn parse_time_zone_designations() { assert_parse_eq!( - time_zone_designations( - 14, - vec![ - LocalTimeTypeRecord { - utoff: Seconds(35356), - is_dst: false, - idx: 0, - }, - LocalTimeTypeRecord { - utoff: Seconds(39600), - is_dst: true, - idx: 4, - }, - LocalTimeTypeRecord { - utoff: Seconds(36000), - is_dst: false, - idx: 9, - }, - ] - ), + time_zone_designations(14), "LMT\0AEDT\0AEST\0", vec!["LMT".to_owned(), "AEDT".to_owned(), "AEST".to_owned()], ); } + #[test] + fn time_zone_designation_indexing() { + let block: &[u8] = &[ + 0x00, 0x00, 0x00, 0x10, 0x01, 0x00, // local time record 0 + 0x00, 0x00, 0x00, 0x10, 0x01, 0x03, // local time record 1 + 0x00, 0x00, 0x00, 0x10, 0x01, 0x04, // local time record 2 + 0x00, 0x00, 0x00, 0x10, 0x01, 0x05, // local time record 3 + b'L', b'M', b'T', 0x00, b'A', b'E', b'D', b'T', 0x00, // timezone designations + ]; + let header = TzifHeader { + version: 0, + isutcnt: 0, + isstdcnt: 0, + leapcnt: 0, + timecnt: 0, + typecnt: 4, + charcnt: 9, + }; + let (block, _) = data_block::<1, _>(header).parse(block).unwrap(); + assert_eq!( + block.time_zone_designation(block.local_time_type_records[0].idx), + Some("LMT") + ); + assert_eq!( + block.time_zone_designation(block.local_time_type_records[1].idx), + Some("") + ); + assert_eq!( + block.time_zone_designation(block.local_time_type_records[2].idx), + Some("AEDT") + ); + assert_eq!( + block.time_zone_designation(block.local_time_type_records[3].idx), + Some("EDT") + ); + assert_eq!(block.time_zone_designation(8), Some("")); + assert_eq!(block.time_zone_designation(9), None); + } + #[test] fn parse_leap_second_occurrence() { const FIVE: &[u8] = 5i64.to_be_bytes().as_slice();