From 2ebe7150bb53b083d396e50f2f5f164a888eef27 Mon Sep 17 00:00:00 2001 From: Finn Bear Date: Sun, 22 Oct 2023 10:54:44 -0700 Subject: [PATCH] Improve wordlists. --- Cargo.toml | 2 +- README.md | 2 +- src/dictionary_blacklist.txt | 1 + src/false_positives.txt | 136 +++++++++++++++++++++++++++++++++++ src/profanity.csv | 10 ++- src/test_positive.txt | 3 +- 6 files changed, 150 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fab504e..aa1ef64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "rustrict" authors = ["Finn Bear"] -version = "0.7.11" +version = "0.7.12" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/finnbear/rustrict/" diff --git a/README.md b/README.md index ec27a28..048a509 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ is used as a dataset. Positive accuracy is the percentage of profanity detected | Crate | Accuracy | Positive Accuracy | Negative Accuracy | Time | |-------|----------|-------------------|-------------------|------| -| [rustrict](https://crates.io/crates/rustrict) | 88.32% | 93.25% | 87.09% | 8s | +| [rustrict](https://crates.io/crates/rustrict) | 88.31% | 93.26% | 87.08% | 8s | | [censor](https://crates.io/crates/censor) | 76.16% | 72.76% | 77.01% | 23s | ## Development diff --git a/src/dictionary_blacklist.txt b/src/dictionary_blacklist.txt index 0bdceb6..25e8120 100644 --- a/src/dictionary_blacklist.txt +++ b/src/dictionary_blacklist.txt @@ -321,6 +321,7 @@ hellholes hells hemipenis hen tai +heroins hic hijackers hoars diff --git a/src/false_positives.txt b/src/false_positives.txt index 3965aca..e932b3f 100644 --- a/src/false_positives.txt +++ b/src/false_positives.txt @@ -3403,6 +3403,7 @@ czech liter czech little czech tier da holes +daddynut dadouchos daftardar dale nintendo @@ -3715,6 +3716,7 @@ disimprison disimprove disks croat disks perm +displeasured disputes croat disputes perm disputes test @@ -5258,6 +5260,7 @@ haythorn haytime hazards croat hazards perm +he rolling he she he'll headbutt @@ -5276,12 +5279,14 @@ heck chi heck hi hecks croat hecks perm +hee rolling hee she heelballs heelless heep anti heep rick heer pes +heer rolling heil hel experience hel experiencing @@ -5359,6 +5364,7 @@ henwife heptadic her bobby her pes +her rolling heraclitism heraldic herbalism @@ -5367,7 +5373,15 @@ herbals herbish herbwife herdic +hero illinois +hero illness +hero in +hero lin hero tic +hero xnxx +heroine +heroinism +heroinize herpeses herpestes herpestinae @@ -5701,7 +5715,105 @@ hoplocephalus hopper version hopper vert horn eye +horn i +horn j +horn lab +horn lac +horn lad +horn lafayette +horn lai +horn lak +horn lam +horn lan +horn lao +horn lap +horn lar +horn las +horn lat +horn laud +horn laugh +horn laun +horn laur +horn law +horn lay +horn laz +horn lea +horn leban +horn lect +horn lee +horn left +horn leg +horn lei +horn lemon +horn len +horn leo +horn leslie +horn less +horn let +horn lev +horn lew +horn lex +horn liabilities +horn liability +horn liable +horn lib +horn lice +horn lick +horn lie +horn lif +horn lig +horn like +horn lim +horn lin +horn lion +horn lip +horn liq +horn lis +horn lit +horn liv +horn lloyd +horn loa +horn lob +horn loc +horn lod +horn log +horn lol +horn lond +horn lone +horn long +horn loo +horn lop +horn lor +horn lose +horn losing +horn loss +horn lost +horn lot +horn lou +horn love +horn loving +horn low +horn lu +horn ly +horn niagara +horn nicaragua +horn nice +horn nicholas +horn nick +horn nicol +horn nike +horn nikon +horn nine +horn nintendo +horn nirvana +horn nis +horn nit horn nylon +horn xanax +horn xbox +horn xerox +horn xhtml +horn xnxx horn ya horn ye horn yi @@ -5709,6 +5821,19 @@ horn yo horn yug horn yuk hornblendic +hornie +hornification +hornified +hornify +hornily +horniness +horning +hornish +hornist +hornito +hornless +hornlet +hornlike horrible experience horrible experiencing horrible experiment @@ -9163,6 +9288,17 @@ play boys play girls playboyism pleasure chester +pleasure da +pleasure de +pleasure di +pleasure do +pleasure dr +pleasure du +pleasure dvds +pleasure dying +pleasure dylan +pleasure dyn +pleasure ed plemochoe plerotic pliss diff --git a/src/profanity.csv b/src/profanity.csv index fd7b0e8..6c76cb5 100644 --- a/src/profanity.csv +++ b/src/profanity.csv @@ -260,7 +260,7 @@ coital,0,0,1,0,0 coitis,0,0,1,0,0 coitus,0,0,2,0,0 cojones,0,0,1,0,0 -cok,2,0,2,0,1 +cok,1,0,2,0,1 coglione,2,0,1,2,0 commie,0,2,0,0,0 commis,0,2,0,0,0 @@ -299,6 +299,7 @@ cunilingus,0,0,3,0,0 cunt,0,2,2,0,0 cunts,0,2,2,0,0 cyalis,0,0,1,0,0 + daddy,0,0,1,0,0 dafuq,2,0,0,0,1 dammit,1,0,0,0,0 damn,1,0,0,0,0 @@ -626,6 +627,7 @@ grope,0,0,1,0,0 groped,0,0,1,0,0 groping,0,0,1,0,0 grostulation,0,0,1,0,0 + gspot,0,0,1,0,0 gun to school,3,0,0,0,0 gun at school,3,0,0,0,0 gunt,0,2,2,0,1 @@ -654,6 +656,7 @@ her ashes,0,1,0,0,0 her bob,0,0,3,0,0 herpes,0,0,1,0,0 her gash,0,0,1,0,0 +heroin,1,0,0,0,0 heshe,0,0,2,0,0 hifler,0,1,0,0,2 hijacker,0,1,0,0,0 @@ -692,6 +695,7 @@ hope u die,0,0,0,3,1 hopeyoualldie,0,0,0,3,0 hopeyoudie,0,0,0,3,0 horney,0,0,2,0,1 + horni,0,0,1,0,0 horniest,0,0,2,0,0 horny,0,0,3,0,0 horseshit,2,0,0,0,0 @@ -847,6 +851,7 @@ kunilingus,0,0,3,0,1 kunt,0,0,1,0,1 kyke,0,2,0,0,1 kys,0,2,0,3,0 +l3itch,0,1,1,1,1 labia,0,0,3,0,0 lactate,0,0,1,0,0 ladyboy,0,0,1,0,0 @@ -948,6 +953,7 @@ molest,0,0,1,0,0 molestation,0,0,1,0,0 molester,0,0,1,0,0 molestor,0,0,1,0,0 + mommy,0,0,1,0,0 monspubis,0,0,1,0,0 monsvenus,0,0,1,0,0 mooncricket,0,1,0,0,0 @@ -1054,6 +1060,7 @@ nolife,0,0,0,2,0 nood,0,0,2,0,2 nooky,0,0,1,0,1 nsdap,0,2,0,0,1 +nsfw,1,1,1,0,0 nsfwimages,1,1,1,0,1 nude,0,0,2,0,0 nudes,0,0,3,0,0 @@ -1168,6 +1175,7 @@ playboybunny,0,0,1,0,0 playbunny,0,0,1,0,0 playgirl,0,0,1,0,0 pleasurechest,0,0,2,0,1 +pleasured,0,0,1,0,0 pmcwagner,0,1,0,0,0 polesmoker,0,0,2,0,1 pompinara,1,0,2,1,0 diff --git a/src/test_positive.txt b/src/test_positive.txt index c52d0c6..c4e4c48 100644 --- a/src/test_positive.txt +++ b/src/test_positive.txt @@ -2649,4 +2649,5 @@ wuzzie yellowman zigabo zipperhead -zipper head \ No newline at end of file +zipper head +l3itch \ No newline at end of file