From 8416fe9acfe2cab2c4813f4a1a87bda556698b7d Mon Sep 17 00:00:00 2001 From: Finn Bear Date: Sun, 22 Sep 2024 13:56:35 -0700 Subject: [PATCH] 0.7.28 - reduce domain name false positives (pii). --- Cargo.toml | 2 +- src/context.rs | 12 +++++++++--- src/pii.rs | 10 +++++++--- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 76a672b..acadb23 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "rustrict" authors = ["Finn Bear"] -version = "0.7.27" +version = "0.7.28" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/finnbear/rustrict/" diff --git a/src/context.rs b/src/context.rs index ceb75af..055b14c 100644 --- a/src/context.rs +++ b/src/context.rs @@ -8,9 +8,9 @@ use std::time::{Duration, Instant}; /// Context is useful for taking moderation actions on a per-user basis i.e. each user would get /// their own Context. -/// +/// /// # Recommendation -/// +/// /// Use this as a reference implementation e.g. by copying and adapting it. #[derive(Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -868,6 +868,12 @@ mod tests { let json = serde_json::to_value(&ctx).unwrap(); let only_safe_until = &json["only_safe_until"]; let unix = only_safe_until.as_i64().unwrap(); - assert!(unix > 1000 + SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_millis() as i64) + assert!( + unix > 1000 + + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_millis() as i64 + ) } } diff --git a/src/pii.rs b/src/pii.rs index ac68c80..5266661 100644 --- a/src/pii.rs +++ b/src/pii.rs @@ -5,10 +5,10 @@ use std::borrow::Cow; lazy_static! { static ref PHONE : Regex = Regex::new(r#"(\+\d{1,2})?\s*\(?\d{3}\)?[\s\.-]*\d{3}[\s\.-]*\d{4}"#).unwrap(); static ref IP_ADDRESS : Regex = Regex::new(r#"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"#).unwrap(); - static ref EMAIL_ADDRESS : Regex = Regex::new(r#"(?i)[a-z0-9_\-]{3,}\s*(@|[\[\(\s]at[\s\)\]])\s*[a-z0-9_\-]{5,}\s*(\.|dot)\s*[a-z]{2,3}"#).unwrap(); + static ref EMAIL_ADDRESS : Regex = Regex::new(r#"(?i)[a-z0-9_\-]{3,}\s*(@|[\[\(\s]at[\s\)\]])\s*[a-z0-9_\-]{5,}\s*(\.|dot)\s*(com|net|org|gov|biz|co|us|ru|uk|de|se|to|tv|io|info|online|site)"#).unwrap(); //static ref ADDRESS : Regex = Regex::new(r#"(?i)\d+[ ](?:[A-Za-z0-9\.-]+ )+(?:Avenue|Lane|Road|Boulevard|Drive|Street|Ave|Dr|Rd|Blvd|Ln|St)\.?(\s+#[0-9]{1,5})?"#).unwrap(); static ref NAME : Regex = Regex::new(r#"(?i)(real\s)?name\s+is:?\s[a-zA-Z]+(\s[a-zA-z]+)?"#).unwrap(); - static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]{4,}\.[a-zA-Z]{2,3}"#).unwrap(); + static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]{4,}\.(com|net|org|gov|biz|co|us|ru|uk|de|se|to|tv|io|info|online|site)"#).unwrap(); } /// Returns [`s`] with personally-identifiable information censored out, and a `true` if @@ -103,7 +103,11 @@ mod tests { for line in include_str!("./safe.txt") .lines() .chain(include_str!("./false_positives.txt").lines()) - .chain(r#"1234 Have 1234"#.lines()) + .chain( + r#"1234 Have 1234 + gmail.zzz"# + .lines(), + ) { assert!(!has_pii(line), "{line}"); }