Skip to content

Commit

Permalink
0.7.28 - reduce domain name false positives (pii).
Browse files Browse the repository at this point in the history
  • Loading branch information
finnbear committed Sep 22, 2024
1 parent 84f2693 commit 8416fe9
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "rustrict"
authors = ["Finn Bear"]
version = "0.7.27"
version = "0.7.28"
edition = "2021"
license = "MIT OR Apache-2.0"
repository = "https://github.com/finnbear/rustrict/"
Expand Down
12 changes: 9 additions & 3 deletions src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ use std::time::{Duration, Instant};

/// Context is useful for taking moderation actions on a per-user basis i.e. each user would get
/// their own Context.
///
///
/// # Recommendation
///
///
/// Use this as a reference implementation e.g. by copying and adapting it.
#[derive(Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
Expand Down Expand Up @@ -868,6 +868,12 @@ mod tests {
let json = serde_json::to_value(&ctx).unwrap();
let only_safe_until = &json["only_safe_until"];
let unix = only_safe_until.as_i64().unwrap();
assert!(unix > 1000 + SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_millis() as i64)
assert!(
unix > 1000
+ SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_millis() as i64
)
}
}
10 changes: 7 additions & 3 deletions src/pii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use std::borrow::Cow;
lazy_static! {
static ref PHONE : Regex = Regex::new(r#"(\+\d{1,2})?\s*\(?\d{3}\)?[\s\.-]*\d{3}[\s\.-]*\d{4}"#).unwrap();
static ref IP_ADDRESS : Regex = Regex::new(r#"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"#).unwrap();
static ref EMAIL_ADDRESS : Regex = Regex::new(r#"(?i)[a-z0-9_\-]{3,}\s*(@|[\[\(\s]at[\s\)\]])\s*[a-z0-9_\-]{5,}\s*(\.|dot)\s*[a-z]{2,3}"#).unwrap();
static ref EMAIL_ADDRESS : Regex = Regex::new(r#"(?i)[a-z0-9_\-]{3,}\s*(@|[\[\(\s]at[\s\)\]])\s*[a-z0-9_\-]{5,}\s*(\.|dot)\s*(com|net|org|gov|biz|co|us|ru|uk|de|se|to|tv|io|info|online|site)"#).unwrap();
//static ref ADDRESS : Regex = Regex::new(r#"(?i)\d+[ ](?:[A-Za-z0-9\.-]+ )+(?:Avenue|Lane|Road|Boulevard|Drive|Street|Ave|Dr|Rd|Blvd|Ln|St)\.?(\s+#[0-9]{1,5})?"#).unwrap();
static ref NAME : Regex = Regex::new(r#"(?i)(real\s)?name\s+is:?\s[a-zA-Z]+(\s[a-zA-z]+)?"#).unwrap();
static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]{4,}\.[a-zA-Z]{2,3}"#).unwrap();
static ref URL : Regex = Regex::new(r#"(?i)(https?:?/*)?[a-zA-Z0-9]{4,}\.(com|net|org|gov|biz|co|us|ru|uk|de|se|to|tv|io|info|online|site)"#).unwrap();
}

/// Returns [`s`] with personally-identifiable information censored out, and a `true` if
Expand Down Expand Up @@ -103,7 +103,11 @@ mod tests {
for line in include_str!("./safe.txt")
.lines()
.chain(include_str!("./false_positives.txt").lines())
.chain(r#"1234 Have 1234"#.lines())
.chain(
r#"1234 Have 1234
gmail.zzz"#
.lines(),
)
{
assert!(!has_pii(line), "{line}");
}
Expand Down

0 comments on commit 8416fe9

Please sign in to comment.