Shorten slur filter to avoid false positives (fixes #535)

This commit is contained in:
Felix Ableitner 2021-01-12 16:32:38 +01:00
parent ccd2b9eb75
commit 15710a0595
2 changed files with 6 additions and 6 deletions

View file

@ -53,7 +53,7 @@ fn test_valid_post_title() {
#[test]
fn test_slur_filter() {
let test =
"coons test dindu ladyboy tranny retardeds. Capitalized Niggerz. This is a bunch of other safe text.";
"faggot test kike tranny cocksucker retardeds. Capitalized Niggerz. This is a bunch of other safe text.";
let slur_free = "No slurs here";
assert_eq!(
remove_slurs(&test),
@ -63,13 +63,13 @@ fn test_slur_filter() {
let has_slurs_vec = vec![
"Niggerz",
"coons",
"dindu",
"ladyboy",
"cocksucker",
"faggot",
"kike",
"retardeds",
"tranny",
];
let has_slurs_err_str = "No slurs - Niggerz, coons, dindu, ladyboy, retardeds, tranny";
let has_slurs_err_str = "No slurs - Niggerz, cocksucker, faggot, kike, retardeds, tranny";
assert_eq!(slur_check(test), Err(has_slurs_vec));
assert_eq!(slur_check(slur_free), Ok(()));

View file

@ -7,7 +7,7 @@ use regex::{Regex, RegexBuilder};
lazy_static! {
static ref EMAIL_REGEX: Regex = Regex::new(r"^[a-zA-Z0-9.!#$%&*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*$").unwrap();
static ref SLUR_REGEX: Regex = RegexBuilder::new(r"(fag(g|got|tard)?|maricos?|cock\s?sucker(s|ing)?|\bn(i|1)g(\b|g?(a|er)?(s|z)?)\b|dindu(s?)|mudslime?s?|kikes?|mongoloids?|towel\s*heads?|\bspi(c|k)s?\b|\bchinks?|niglets?|beaners?|\bnips?\b|\bcoons?\b|jungle\s*bunn(y|ies?)|jigg?aboo?s?|\bpakis?\b|rag\s*heads?|gooks?|cunts?|bitch(es|ing|y)?|puss(y|ies?)|twats?|feminazis?|whor(es?|ing)|\bslut(s|t?y)?|\btr(a|@)nn?(y|ies?)|ladyboy(s?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().unwrap();
static ref SLUR_REGEX: Regex = RegexBuilder::new(r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|\bn(i|1)g(\b|g?(a|er)?(s|z)?)\b|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().unwrap();
static ref USERNAME_MATCHES_REGEX: Regex = Regex::new(r"/u/[a-zA-Z][0-9a-zA-Z_]*").unwrap();
// TODO keep this old one, it didn't work with port well tho
// static ref MENTIONS_REGEX: Regex = Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)").unwrap();