diff --git a/Cargo.toml b/Cargo.toml index 89705da..62825b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "rustrict" authors = ["Finn Bear"] -version = "0.7.25" +version = "0.7.26" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/finnbear/rustrict/" diff --git a/README.md b/README.md index 7d5d6af..dea75cf 100644 --- a/README.md +++ b/README.md @@ -132,8 +132,7 @@ If you want to add custom profanities or safe words, enable the `customize` feat } ``` -But wait, there's more! If your use-case is chat moderation, and you can store data on a per-user basis, you -might benefit from the `context` feature. +If your use-case is chat moderation, and you store data on a per-user basis, you can use `rustrict::Context` as a reference implementation: ```rust #[cfg(feature = "context")] @@ -178,7 +177,7 @@ is used as a dataset. Positive accuracy is the percentage of profanity detected | Crate | Accuracy | Positive Accuracy | Negative Accuracy | Time | |-------|----------|-------------------|-------------------|------| -| [rustrict](https://crates.io/crates/rustrict) | 79.74% | 94.00% | 76.18% | 9s | +| [rustrict](https://crates.io/crates/rustrict) | 79.74% | 94.00% | 76.19% | 9s | | [censor](https://crates.io/crates/censor) | 76.16% | 72.76% | 77.01% | 23s | ## Development diff --git a/src/character_analyzer.rs b/src/character_analyzer.rs index d6842e9..abb6266 100644 --- a/src/character_analyzer.rs +++ b/src/character_analyzer.rs @@ -49,6 +49,7 @@ fn main() { '🐿' => 20, '𒐫' => 40, '𒈙' => 35, + '༺' | '༻' => 25, _ => { let max_width = (max_width(c, &fonts) as f32 / 100f32).round() as u16; if max_width > u8::MAX as u16 { diff --git a/src/character_widths.bin b/src/character_widths.bin index 886a220..26d7de0 100644 Binary files a/src/character_widths.bin and b/src/character_widths.bin differ diff --git a/src/context.rs b/src/context.rs index 1069a00..ceb75af 100644 --- a/src/context.rs +++ b/src/context.rs @@ -8,6 +8,10 @@ use std::time::{Duration, Instant}; /// Context is useful for taking moderation actions on a per-user basis i.e. each user would get /// their own Context. +/// +/// # Recommendation +/// +/// Use this as a reference implementation e.g. by copying and adapting it. #[derive(Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(doc, doc(cfg(feature = "context")))] diff --git a/src/dictionary_extra.txt b/src/dictionary_extra.txt index 4222075..f40c2e9 100644 --- a/src/dictionary_extra.txt +++ b/src/dictionary_extra.txt @@ -1,16 +1,21 @@ #8 # of +(until 2 secs 3 secs 4 secs +45s 5 secs 6 secs 7 secs 8 secs 88 9 secs +9 is still 99 0 secs +300 bot +600 bot twinkie two secs three secs @@ -22,6 +27,7 @@ eight secs nine secs ten secs aboutit +admit it's ain't it alt an ai @@ -78,6 +84,7 @@ few secs ffa game fire cracker fire crackers +forgot it's francoitalian franco italian freakin @@ -101,6 +108,7 @@ hellen hellp h on keyboard h tier +hi @Bla hi tirp ho ho ho honkeytonk @@ -184,6 +192,7 @@ pp. 9 pussinboots puss in boots ref'd +refresh at rip saturated fat shoehorn your @@ -197,6 +206,7 @@ suicide squad superbowlxxx tally ho tally-ho +tea the test test test then i guess then talk @@ -229,6 +239,7 @@ virgin islands wassup wasn't it wouldn't it +xp or no yass yesturday zenga diff --git a/src/false_positives.txt b/src/false_positives.txt index 94c37c4..ba5962b 100644 --- a/src/false_positives.txt +++ b/src/false_positives.txt @@ -1,13 +1,18 @@ # of #8 +(until 0 secs 2 secs 3 secs +300 bot 4 secs +45s 5 secs 6 secs +600 bot 7 secs 8 secs +9 is still 9 secs a analog a analyse @@ -147,6 +152,7 @@ adipex nissan adipex pee adipex rated adiposogenital +admit it's ado lif adramelech adrammelech @@ -2749,6 +2755,14 @@ bol lock bol locks bol look bol looks +bomb china +bomb india +bomb iran +bomb israel +bomb palestine +bomb russia +bomb ukraine +bomb usage bon ed bon eric bon erik @@ -6863,6 +6877,7 @@ fore skin forebreast forget lost forget married +forgot it's fork cocktail fork commission fork cook @@ -7800,6 +7815,7 @@ heterosex heterotic hexadic hexanal +hi @Bla hi little hi tier hi tile @@ -7822,6 +7838,7 @@ highs perm highs seeks hilar hildebrandic +hill hitting hill illus hill iv hill ju @@ -9654,6 +9671,7 @@ junk until junk untitled junk unto jurisprude +just cumulative justments cumulative justments ext justments hilt @@ -9998,6 +10016,7 @@ kill twelve kill twenty kill twi kill ty +killed yourself killian killing jewel killing palestinian @@ -10351,6 +10370,7 @@ less blin less bo lets cumulative lets ext +lets fake lets hilt lets hit lets lut @@ -13515,6 +13535,7 @@ plumbaginaceous plumbum plumigerous plzz +pmsg pn lips pn nigeria pnigerophobia @@ -15134,6 +15155,7 @@ res perm res seeks resex resh aging +resh at resh hilt resh hit resh it @@ -17647,6 +17669,7 @@ tch linking tch links tch little tchincou +tea the teanal teapottykin teataster @@ -19880,6 +19903,7 @@ xnxx until xnxx untitled xnxx unto xnxx vie +xp or no ya holes yacht its yacht texts diff --git a/src/profanity.csv b/src/profanity.csv index c7faaa2..f84f379 100644 --- a/src/profanity.csv +++ b/src/profanity.csv @@ -32,6 +32,7 @@ _l_,1,0,2,0,1 8==D,0,0,2,0,1 8==o,0,0,2,0,1 9/11,0,1,0,0,0 +9/!!,0,1,0,0,1 c==3,0,0,2,0,1 c==8,0,0,2,0,1 c==B,0,0,2,0,1 @@ -49,6 +50,7 @@ agina,0,0,1,0,0 ahole,2,0,1,2,0 aholes,2,0,1,0,0 aligatorbait,0,1,0,0,0 +al qassam,0,1,0,0,1 anal,2,0,2,0,0 anilingus,0,0,3,0,0 anus,2,0,0,0,0 @@ -81,6 +83,8 @@ asslick,2,0,1,2,0 assjaber,2,0,2,0,0 asspirate,2,2,2,0,1 asswipe,2,0,0,2,0 +auchwits,0,2,0,0,1 +auchwitz,0,2,0,0,1 auschwits,0,2,0,0,0 auschwitz,0,2,0,0,1 aushwits,0,2,0,0,2 @@ -105,6 +109,7 @@ baisee,2,0,1,0,0 ballgag,0,0,2,0,1 ballgravy,0,0,2,0,1 ballicker,0,0,1,0,0 +ballcancer,0,0,2,0,1 ball licking,0,0,2,0,0 balls,0,0,1,0,0 ballsaque,0,0,2,0,2 @@ -166,6 +171,8 @@ biggerduck,1,0,1,1,1 biggestduck,1,0,1,1,1 biggyat,0,0,2,0,1 bigknob,0,0,2,0,3 + big peis,0,0,2,0,2 + big pnis,0,0,2,0,2 bihtch,0,2,2,0,1 bihtches,0,2,2,0,1 bint,1,1,0,1,1 @@ -205,6 +212,14 @@ bolocks,2,0,2,0,0 bollok,2,0,2,0,1 bolloks,2,0,2,0,1 bolox,2,0,2,0,1 +bombchina,0,2,0,0,0 +bombindia,0,2,0,0,0 +bombiran,0,2,0,0,0 +bombisrael,0,2,0,0,0 +bombpalestine,0,2,0,0,0 +bombrussia,0,2,0,0,0 +bombukraine,0,2,0,0,0 +bomb usa,0,2,0,0,0 bondage,0,0,3,0,0 bone her,0,0,1,0,1 boner,0,0,3,0,0 @@ -290,6 +305,8 @@ chingchong,0,2,0,1,0 chingchangchong,0,2,0,1,0 chinc,0,2,0,0,1 chink,0,3,0,0,0 +chiturpants,1,0,0,0,2 +chityourpants,1,0,0,0,2 choad,0,0,2,0,1 choclatepeople,0,1,0,0,1 chocolatepeople,0,1,0,0,1 @@ -325,6 +342,7 @@ comingonyou,0,0,1,0,1 comitsuicide,1,2,0,3,0 concentrationcamp,0,1,0,0,1 condom,0,0,2,0,0 +condum,0,0,1,0,1 connard,1,0,0,2,0 connasse,1,0,0,2,0 coochie,0,0,1,0,0 @@ -451,6 +469,7 @@ eat dog,0,1,0,0,0 ecchi,0,0,2,0,0 eff,2,0,2,0,1 effing,2,0,2,0,1 +eff u,2,0,2,0,1 ejaculat,0,0,2,0,0 ejaculate,0,0,2,0,0 encule,2,0,0,0,0 @@ -546,6 +565,7 @@ femdom,0,0,1,0,0 femur feeler,0,0,1,0,0 fetish,0,0,2,0,0 fets luck,0,0,1,0,2 +fggot,0,2,1,0,1 fhuck,2,0,2,0,2 fhucked,2,0,1,0,2 fhucker,2,0,2,2,2 @@ -722,6 +742,7 @@ ghetto,0,2,0,0,0 giganut,0,0,1,0,1 gigolo,0,0,1,0,0 gilipolas,0,0,2,2,0 +gimme head,0,0,2,0,2 girlfrend,0,0,1,0,0 girlfriend,0,0,1,0,0 girlontop,0,0,1,0,0 @@ -799,6 +820,7 @@ hifler,0,1,0,0,2 hijacker,0,1,0,0,0 hijo de perra,0,0,1,2,0 hijo de puta,0,0,1,2,0 +hil hitl,0,2,0,0,3 hilter,0,2,0,0,2 his ashes,0,1,0,0,0 hisrivateplace,0,0,1,0,1 @@ -879,6 +901,7 @@ idolfhatler,0,2,0,0,2 idrugkids,0,1,0,0,1 ifkdurmum,2,2,3,2,2 igger,3,3,0,0,0 +ilovekids,0,0,1,0,1 has a black hawk,1,0,1,0,2 have a black hawk,1,0,1,0,2 i have ED,0,0,1,0,1 @@ -888,9 +911,11 @@ ima negro,0,1,0,0,1 imbecil,0,0,0,2,0 imma tuch u,0,0,1,0,1 impregnate,0,0,1,0,0 +inbred,0,1,1,0,0 incel,0,1,2,1,0 incest,0,0,3,0,0 -inbred,0,1,1,0,0 +indianbeater,0,2,0,0,1 +indianhater,0,1,0,0,1 injun,0,1,0,0,0 insemin,0,0,2,0,0 inseminate,0,0,3,0,0 @@ -902,6 +927,7 @@ intercourse,0,0,2,0,1 internmentcamp,0,1,0,0,1 irection,0,0,1,0,1 isgay,0,2,0,0,0 +islamhater,0,1,0,0,1 israeldid911,0,2,0,0,1 jackass,1,0,0,1,0 jackoff,0,0,2,0,0 @@ -938,6 +964,7 @@ jumpofabridge,0,1,0,2,0 jumpoutawindow,0,1,0,2,0 junglebunny,0,1,0,0,0 jusey,0,0,1,0,1 +justcum,0,0,1,0,0 kacke,1,0,0,0,0 kafir,0,1,0,0,0 khunt,0,1,1,0,1 @@ -958,6 +985,7 @@ kinky,0,0,1,0,0 kill student,3,0,0,0,0 killallblack,0,2,0,0,0 killallblak,0,2,0,0,1 +killallmuslim,0,2,0,0,0 killallofuirl,0,2,0,0,2 killallofyouirl,0,2,0,0,1 killallstudent,0,3,0,0,0 @@ -965,6 +993,7 @@ killamerica,0,1,0,0,0 killaschool,3,0,0,0,0 killchinese,0,2,0,0,0 killchina,0,2,0,0,0 +killedyourself,0,2,0,3,0 killfamily,0,2,0,0,0 killjew,0,2,0,0,0 killmyfamily,0,2,0,0,0 @@ -1051,6 +1080,7 @@ lesbayn,0,0,1,0,1 lesbian,0,0,2,0,0 lesbin,0,0,1,0,1 lesbo,0,0,1,0,1 + letsfak,0,0,2,0,2 lezbo,0,0,2,0,1 lezzie,0,2,2,2,1 lezzo,0,2,2,2,1 @@ -1064,6 +1094,7 @@ ligmaball,0,0,2,0,1 likecoc,0,0,1,0,0 like monkey,0,1,0,2,1 like a monkey,0,1,0,2,1 + limp pns,0,0,2,0,2 los cojones,0,0,1,0,0 loser,0,0,0,2,0 lovebone,0,0,1,0,0 @@ -1134,6 +1165,7 @@ mikeock,2,0,2,1,2 mikeoxlong,2,0,2,0,2 milf,0,0,1,0,0 milfhunter,0,0,2,0,0 +milkers,0,0,1,0,1 milkyourmother,0,0,1,2,0 minesinches,1,0,1,0,1 minge,0,0,3,0,1 @@ -1237,6 +1269,7 @@ nga,2,3,0,0,2 ngga,2,3,0,0,2 ngger,3,5,0,0,2 ngiger,3,5,0,0,3 +ni55a,1,2,0,0,2 nibba,2,3,0,0,2 nicca,2,3,0,0,2 nicga,2,3,0,0,2 @@ -1283,6 +1316,7 @@ nmsl,0,1,0,2,2 nobjockey,0,0,1,0,0 nobjocky,0,0,1,0,0 nobjokey,0,0,1,0,1 + nogger,1,2,0,0,2 noggers,3,5,0,0,2 nolife,0,0,0,2,0 no nut nov,0,0,2,0,1 @@ -1315,6 +1349,8 @@ nutjob,0,0,0,1,0 nutsack,0,0,2,0,0 nuts ack,0,0,2,0,1 nutte,0,1,1,0,0 +nutted on,0,0,2,0,1 +nut on you,0,0,2,0,1 nuzi,0,2,0,0,2 nygar,1,2,0,0,3 nyger,3,5,0,0,2 @@ -1435,6 +1471,7 @@ pleasurechest,0,0,2,0,1 pleasured,0,0,1,0,0 pmalsenis,0,0,1,1,1 pmcwagner,0,1,0,0,0 + pms,0,0,1,0,1 polesmoker,0,0,2,0,1 pompinara,1,0,2,1,0 pompino,1,0,2,0,0 @@ -1527,6 +1564,7 @@ reversecowgirl,0,0,3,0,0 rice eater,0,1,0,0,0 rimjaw,0,0,1,0,1 rimjob,0,0,2,0,1 + rim me,0,0,1,0,1 rimming,0,0,1,0,1 ritard,0,1,0,1,1 romance,0,0,1,0,0 @@ -1652,6 +1690,8 @@ slut,0,2,2,0,0 smallD,2,0,2,2,2 small dick,2,0,2,2,0 small dk,2,0,2,2,2 +smash a child,0,0,2,0,1 +smash a kid,0,0,2,0,1 smashingman,0,0,2,0,1 smashingmen,0,0,2,0,1 smashingwoman,0,0,2,0,1 @@ -1709,6 +1749,7 @@ stronzo,2,0,1,0,0 stupid,0,0,0,1,0 squaw,0,1,0,0,0 squintyeye,0,1,0,0,1 +suc me dadt,0,0,2,0,2 suc my,0,0,3,0,1 succ,0,0,1,1,1 sucs,0,0,1,1,1 @@ -1721,6 +1762,7 @@ sucks,0,0,1,1,0 suckit,0,0,2,2,0 suckme,0,0,3,0,0 suckmy,0,0,3,0,0 +suckballs,0,0,3,0,0 suck your,0,0,3,0,0 suhck,0,0,1,1,2 suhckme,0,0,3,0,2 @@ -1785,6 +1827,7 @@ touched a kid,0,0,1,0,1 touched kids,0,0,1,0,1 touching a kid,0,0,1,0,1 touching kids,0,0,1,0,1 +touching little kids,0,0,1,0,1 touchherprivate,0,0,1,0,1 touchhisprivate,0,0,1,0,0 touchmyprivate,0,0,1,0,1 @@ -1911,8 +1954,11 @@ wilmadiqfit,0,0,3,0,1 wichser,0,0,1,1,0 wiseass,1,0,0,1,0 with autism,0,1,0,1,1 +withdarkskin,0,1,0,0,1 wives,0,0,1,0,0 wnker,0,0,1,0,1 +woreacondom,0,0,2,0,0 +woreacondum,0,0,2,0,1 wuss,0,0,0,1,0 wuzie,0,0,0,1,0 xrated,0,0,2,0,0 @@ -1934,8 +1980,10 @@ you monkey,0,1,0,1,0 you mother,0,0,1,1,0 your adres,0,0,0,0,2 yourass,2,0,1,0,0 +yourchit,1,0,0,2,2 yourdadleft,0,0,0,2,0 your gash,0,0,1,0,0 +yourechit,1,0,0,2,2 youre hole,0,0,2,0,3 youremother,0,0,1,1,1 youresobad,0,0,0,2,0 @@ -2050,4 +2098,17 @@ ybnюдok,1,0,1,0,0 艹尼马,2,0,0,0,0 已撸,2,0,0,0,0 打飞机,0,0,2,0,0 -纳粹,0,1,0,0,0 \ No newline at end of file +纳粹,0,1,0,0,0 +犹太必死,0,2,0,0,1 +爆炒犹太,0,2,0,0,2 +共产党,0,1,0,0,0 +共贼,0,1,0,0,0 +法轮功,0,1,0,0,0 +明慧网,0,1,0,0,1 +明慧广播电台,0,1,0,0,1 +希特勒,0,1,0,0,0 +台湾独立,0,1,0,0,0 +香港独立,0,1,0,0,0 +澳门独立,0,1,0,0,0 +他妈ㄉ王八蛋,2,0,0,2,0 +塞你老母,0,0,2,0,1 \ No newline at end of file diff --git a/src/test_positive.txt b/src/test_positive.txt index dd92680..0dc04c0 100644 --- a/src/test_positive.txt +++ b/src/test_positive.txt @@ -2717,4 +2717,9 @@ ima negro gabe_itch ᶰᵃᶻᶤ 🅝🅔ggir -m1qq3r \ No newline at end of file +m1qq3r +nutt on you kid +f # g g o t s +p§§y +kill all muslims +9/!! \ No newline at end of file