Simplify slur regex (#5442)

* Simplify logic for slur regex

* Fix tests

* clippy

* fix test

* cache regex

* longer cache duration
This commit is contained in:
Nutomic 2025-02-20 17:47:18 +00:00 committed by GitHub
parent 510b0db869
commit 544a4cc039
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 159 additions and 258 deletions

1
Cargo.lock generated
View file

@ -2570,6 +2570,7 @@ dependencies = [
"lemmy_db_views",
"lemmy_utils",
"pretty_assertions",
"regex",
"serial_test",
"sitemap-rs",
"tokio",

View file

@ -31,6 +31,7 @@ anyhow = { workspace = true }
tracing = { workspace = true }
chrono = { workspace = true }
url = { workspace = true }
regex = { workspace = true }
hound = "3.5.1"
sitemap-rs = "0.2.2"
totp-rs = { version = "5.6.0", features = ["gen_secret", "otpauth"] }

View file

@ -5,7 +5,7 @@ use lemmy_api_common::{
community::BanFromCommunity,
context::LemmyContext,
send_activity::{ActivityChannel, SendActivityData},
utils::{check_expire_time, local_site_to_slur_regex},
utils::check_expire_time,
};
use lemmy_db_schema::{
source::{
@ -15,7 +15,6 @@ use lemmy_db_schema::{
CommunityPersonBan,
CommunityPersonBanForm,
},
local_site::LocalSite,
mod_log::moderator::{ModBanFromCommunity, ModBanFromCommunityForm},
person::Person,
},
@ -26,6 +25,7 @@ use lemmy_utils::{
error::{LemmyErrorExt, LemmyErrorType, LemmyResult},
utils::slurs::check_slurs,
};
use regex::Regex;
use std::io::Cursor;
use totp_rs::{Secret, TOTP};
@ -79,9 +79,7 @@ pub(crate) fn captcha_as_wav_base64(captcha: &Captcha) -> LemmyResult<String> {
}
/// Check size of report
pub(crate) fn check_report_reason(reason: &str, local_site: &LocalSite) -> LemmyResult<()> {
let slur_regex = &local_site_to_slur_regex(local_site);
pub(crate) fn check_report_reason(reason: &str, slur_regex: &Regex) -> LemmyResult<()> {
check_slurs(reason, slur_regex)?;
if reason.is_empty() {
Err(LemmyErrorType::ReportReasonRequired)?

View file

@ -3,12 +3,7 @@ use actix_web::web::Json;
use lemmy_api_common::{
context::LemmyContext,
person::SaveUserSettings,
utils::{
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
send_verification_email,
},
utils::{get_url_blocklist, process_markdown_opt, send_verification_email, slur_regex},
SuccessResponse,
};
use lemmy_db_schema::{
@ -35,7 +30,7 @@ pub async fn save_user_settings(
) -> LemmyResult<Json<SuccessResponse>> {
let site_view = SiteView::read_local(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&site_view.local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let bio = diesel_string_update(
process_markdown_opt(&data.bio, &slur_regex, &url_blocklist, &context)

View file

@ -9,6 +9,7 @@ use lemmy_api_common::{
check_comment_deleted_or_removed,
check_community_user_action,
send_new_report_email_to_admins,
slur_regex,
},
};
use lemmy_db_schema::{
@ -27,10 +28,9 @@ pub async fn create_comment_report(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<CommentReportResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let reason = data.reason.trim().to_string();
check_report_reason(&reason, &local_site)?;
let slur_regex = slur_regex(&context).await?;
check_report_reason(&reason, &slur_regex)?;
let person_id = local_user_view.person.id;
let comment_id = data.comment_id;
@ -67,6 +67,7 @@ pub async fn create_comment_report(
CommentReportView::read(&mut context.pool(), report.id, person_id).await?;
// Email the admins
let local_site = LocalSite::read(&mut context.pool()).await?;
if local_site.reports_email_admins {
send_new_report_email_to_admins(
&comment_report_view.creator.name,

View file

@ -9,6 +9,7 @@ use lemmy_api_common::{
check_community_user_action,
check_post_deleted_or_removed,
send_new_report_email_to_admins,
slur_regex,
},
};
use lemmy_db_schema::{
@ -27,10 +28,9 @@ pub async fn create_post_report(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<PostReportResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let reason = data.reason.trim().to_string();
check_report_reason(&reason, &local_site)?;
let slur_regex = slur_regex(&context).await?;
check_report_reason(&reason, &slur_regex)?;
let person_id = local_user_view.person.id;
let post_id = data.post_id;
@ -62,6 +62,7 @@ pub async fn create_post_report(
let post_report_view = PostReportView::read(&mut context.pool(), report.id, person_id).await?;
// Email the admins
let local_site = LocalSite::read(&mut context.pool()).await?;
if local_site.reports_email_admins {
send_new_report_email_to_admins(
&post_report_view.creator.name,

View file

@ -3,7 +3,7 @@ use actix_web::web::{Data, Json};
use lemmy_api_common::{
context::LemmyContext,
reports::private_message::{CreatePrivateMessageReport, PrivateMessageReportResponse},
utils::send_new_report_email_to_admins,
utils::{send_new_report_email_to_admins, slur_regex},
};
use lemmy_db_schema::{
source::{
@ -21,10 +21,9 @@ pub async fn create_pm_report(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<PrivateMessageReportResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let reason = data.reason.trim().to_string();
check_report_reason(&reason, &local_site)?;
let slur_regex = slur_regex(&context).await?;
check_report_reason(&reason, &slur_regex)?;
let person_id = local_user_view.person.id;
let private_message_id = data.private_message_id;
@ -50,6 +49,7 @@ pub async fn create_pm_report(
PrivateMessageReportView::read(&mut context.pool(), report.id).await?;
// Email the admins
let local_site = LocalSite::read(&mut context.pool()).await?;
if local_site.reports_email_admins {
send_new_report_email_to_admins(
&private_message_report_view.creator.name,

View file

@ -71,8 +71,8 @@ use lemmy_utils::{
spawn_try_task,
utils::{
markdown::{image_links::markdown_rewrite_image_links, markdown_check_for_blocked_urls},
slurs::{build_slur_regex, remove_slurs},
validation::clean_urls_in_text,
slurs::remove_slurs,
validation::{build_and_check_regex, clean_urls_in_text},
},
CacheLock,
CACHE_DURATION_FEDERATION,
@ -540,15 +540,22 @@ pub fn local_site_rate_limit_to_rate_limit_config(
})
}
pub fn local_site_to_slur_regex(local_site: &LocalSite) -> Option<LemmyResult<Regex>> {
build_slur_regex(local_site.slur_filter_regex.as_deref())
}
pub fn local_site_opt_to_slur_regex(local_site: &Option<LocalSite>) -> Option<LemmyResult<Regex>> {
local_site
.as_ref()
.map(local_site_to_slur_regex)
.unwrap_or(None)
pub async fn slur_regex(context: &LemmyContext) -> LemmyResult<Regex> {
static CACHE: CacheLock<Regex> = LazyLock::new(|| {
Cache::builder()
.max_capacity(1)
.time_to_live(CACHE_DURATION_FEDERATION)
.build()
});
Ok(
CACHE
.try_get_with((), async {
let local_site = LocalSite::read(&mut context.pool()).await.ok();
build_and_check_regex(local_site.and_then(|s| s.slur_filter_regex).as_deref())
})
.await
.map_err(|e| anyhow::anyhow!("Failed to construct regex: {e}"))?,
)
}
pub async fn get_url_blocklist(context: &LemmyContext) -> LemmyResult<RegexSet> {
@ -1037,7 +1044,7 @@ pub fn check_conflicting_like_filters(
pub async fn process_markdown(
text: &str,
slur_regex: &Option<LemmyResult<Regex>>,
slur_regex: &Regex,
url_blocklist: &RegexSet,
context: &LemmyContext,
) -> LemmyResult<String> {
@ -1069,7 +1076,7 @@ pub async fn process_markdown(
pub async fn process_markdown_opt(
text: &Option<String>,
slur_regex: &Option<LemmyResult<Regex>>,
slur_regex: &Regex,
url_blocklist: &RegexSet,
context: &LemmyContext,
) -> LemmyResult<Option<String>> {

View file

@ -10,8 +10,8 @@ use lemmy_api_common::{
check_post_deleted_or_removed,
get_url_blocklist,
is_mod_or_admin,
local_site_to_slur_regex,
process_markdown,
slur_regex,
update_read_comments,
},
};
@ -21,7 +21,6 @@ use lemmy_db_schema::{
source::{
comment::{Comment, CommentInsertForm, CommentLike, CommentLikeForm},
comment_reply::{CommentReply, CommentReplyUpdateForm},
local_site::LocalSite,
person_comment_mention::{PersonCommentMention, PersonCommentMentionUpdateForm},
},
traits::{Crud, Likeable},
@ -38,9 +37,7 @@ pub async fn create_comment(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<CommentResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&content, false)?;

View file

@ -6,20 +6,12 @@ use lemmy_api_common::{
comment::{CommentResponse, EditComment},
context::LemmyContext,
send_activity::{ActivityChannel, SendActivityData},
utils::{
check_community_user_action,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
},
utils::{check_community_user_action, get_url_blocklist, process_markdown_opt, slur_regex},
};
use lemmy_db_schema::{
impls::actor_language::validate_post_language,
newtypes::PostOrCommentId,
source::{
comment::{Comment, CommentUpdateForm},
local_site::LocalSite,
},
source::comment::{Comment, CommentUpdateForm},
traits::Crud,
};
use lemmy_db_views::structs::{CommentView, LocalUserView};
@ -33,8 +25,6 @@ pub async fn update_comment(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<CommentResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let comment_id = data.comment_id;
let orig_comment = CommentView::read(
&mut context.pool(),
@ -63,7 +53,7 @@ pub async fn update_comment(
)
.await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown_opt(&data.content, &slur_regex, &url_blocklist, &context).await?;
if let Some(content) = &content {

View file

@ -10,8 +10,8 @@ use lemmy_api_common::{
generate_inbox_url,
get_url_blocklist,
is_admin,
local_site_to_slur_regex,
process_markdown_opt,
slur_regex,
},
};
use lemmy_db_schema::{
@ -54,7 +54,7 @@ pub async fn create_community(
Err(LemmyErrorType::OnlyAdminsCanCreateCommunities)?
}
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
check_slurs(&data.name, &slur_regex)?;
check_slurs(&data.title, &slur_regex)?;

View file

@ -7,18 +7,12 @@ use lemmy_api_common::{
community::{CommunityResponse, EditCommunity},
context::LemmyContext,
send_activity::{ActivityChannel, SendActivityData},
utils::{
check_community_mod_action,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
},
utils::{check_community_mod_action, get_url_blocklist, process_markdown_opt, slur_regex},
};
use lemmy_db_schema::{
source::{
actor_language::{CommunityLanguage, SiteLanguage},
community::{Community, CommunityUpdateForm},
local_site::LocalSite,
},
traits::Crud,
utils::diesel_string_update,
@ -34,9 +28,7 @@ pub async fn update_community(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<CommunityResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
check_slurs_opt(&data.title, &slur_regex)?;

View file

@ -11,9 +11,9 @@ use lemmy_api_common::{
check_community_user_action,
get_url_blocklist,
honeypot_check,
local_site_to_slur_regex,
process_markdown_opt,
send_webmention,
slur_regex,
},
};
use lemmy_db_schema::{
@ -21,7 +21,6 @@ use lemmy_db_schema::{
newtypes::PostOrCommentId,
source::{
community::Community,
local_site::LocalSite,
post::{Post, PostInsertForm, PostLike, PostLikeForm, PostRead, PostReadForm},
},
traits::{Crud, Likeable},
@ -48,11 +47,9 @@ pub async fn create_post(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<PostResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
honeypot_check(&data.honeypot)?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
check_slurs(&data.name, &slur_regex)?;
let url_blocklist = get_url_blocklist(&context).await?;

View file

@ -11,9 +11,9 @@ use lemmy_api_common::{
utils::{
check_community_user_action,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
send_webmention,
slur_regex,
},
};
use lemmy_db_schema::{
@ -21,7 +21,6 @@ use lemmy_db_schema::{
newtypes::PostOrCommentId,
source::{
community::Community,
local_site::LocalSite,
post::{Post, PostUpdateForm},
},
traits::Crud,
@ -49,15 +48,13 @@ pub async fn update_post(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<PostResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let url = diesel_url_update(data.url.as_deref())?;
let custom_thumbnail = diesel_url_update(data.custom_thumbnail.as_deref())?;
let url_blocklist = get_url_blocklist(&context).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let body = diesel_string_update(
process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context)

View file

@ -7,14 +7,13 @@ use lemmy_api_common::{
utils::{
check_private_messages_enabled,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown,
send_email_to_user,
slur_regex,
},
};
use lemmy_db_schema::{
source::{
local_site::LocalSite,
person_block::PersonBlock,
private_message::{PrivateMessage, PrivateMessageInsertForm},
},
@ -31,9 +30,7 @@ pub async fn create_private_message(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<PrivateMessageResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&content, false)?;

View file

@ -5,13 +5,10 @@ use lemmy_api_common::{
context::LemmyContext,
private_message::{EditPrivateMessage, PrivateMessageResponse},
send_activity::{ActivityChannel, SendActivityData},
utils::{get_url_blocklist, local_site_to_slur_regex, process_markdown},
utils::{get_url_blocklist, process_markdown, slur_regex},
};
use lemmy_db_schema::{
source::{
local_site::LocalSite,
private_message::{PrivateMessage, PrivateMessageUpdateForm},
},
source::private_message::{PrivateMessage, PrivateMessageUpdateForm},
traits::Crud,
};
use lemmy_db_views::structs::{LocalUserView, PrivateMessageView};
@ -25,8 +22,6 @@ pub async fn update_private_message(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> LemmyResult<Json<PrivateMessageResponse>> {
let local_site = LocalSite::read(&mut context.pool()).await?;
// Checking permissions
let private_message_id = data.private_message_id;
let orig_private_message = PrivateMessage::read(&mut context.pool(), private_message_id).await?;
@ -35,7 +30,7 @@ pub async fn update_private_message(
}
// Doing the update
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&content, false)?;

View file

@ -11,8 +11,8 @@ use lemmy_api_common::{
get_url_blocklist,
is_admin,
local_site_rate_limit_to_rate_limit_config,
local_site_to_slur_regex,
process_markdown_opt,
slur_regex,
},
};
use lemmy_db_schema::{
@ -57,7 +57,7 @@ pub async fn create_site(
let inbox_url = Some(generate_inbox_url()?);
let keypair = generate_actor_keypair()?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?;
@ -149,11 +149,11 @@ fn validate_create_payload(local_site: &LocalSite, create_site: &CreateSite) ->
// Check that the slur regex compiles, and returns the regex if valid...
// Prioritize using new slur regex from the request; if not provided, use the existing regex.
let slur_regex = build_and_check_regex(
&create_site
create_site
.slur_filter_regex
.as_deref()
.or(local_site.slur_filter_regex.as_deref()),
);
)?;
site_name_length_check(&create_site.name)?;
check_slurs(&create_site.name, &slur_regex)?;

View file

@ -10,8 +10,8 @@ use lemmy_api_common::{
get_url_blocklist,
is_admin,
local_site_rate_limit_to_rate_limit_config,
local_site_to_slur_regex,
process_markdown_opt,
slur_regex,
},
};
use lemmy_db_schema::{
@ -61,7 +61,7 @@ pub async fn update_site(
SiteLanguage::update(&mut context.pool(), discussion_languages.clone(), &site).await?;
}
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let sidebar = diesel_string_update(
process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context)
@ -192,11 +192,11 @@ fn validate_update_payload(local_site: &LocalSite, edit_site: &EditSite) -> Lemm
// Check that the slur regex compiles, and return the regex if valid...
// Prioritize using new slur regex from the request; if not provided, use the existing regex.
let slur_regex = build_and_check_regex(
&edit_site
edit_site
.slur_filter_regex
.as_deref()
.or(local_site.slur_filter_regex.as_deref()),
);
)?;
if let Some(name) = &edit_site.name {
// The name doesn't need to be updated, but if provided it cannot be blanked out...

View file

@ -3,13 +3,10 @@ use actix_web::web::Json;
use lemmy_api_common::{
context::LemmyContext,
tagline::{CreateTagline, TaglineResponse},
utils::{get_url_blocklist, is_admin, local_site_to_slur_regex, process_markdown},
utils::{get_url_blocklist, is_admin, process_markdown, slur_regex},
};
use lemmy_db_schema::{
source::{
local_site::LocalSite,
tagline::{Tagline, TaglineInsertForm},
},
source::tagline::{Tagline, TaglineInsertForm},
traits::Crud,
};
use lemmy_db_views::structs::LocalUserView;
@ -23,9 +20,7 @@ pub async fn create_tagline(
// Make sure user is an admin
is_admin(&local_user_view)?;
let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;

View file

@ -4,13 +4,10 @@ use chrono::Utc;
use lemmy_api_common::{
context::LemmyContext,
tagline::{TaglineResponse, UpdateTagline},
utils::{get_url_blocklist, is_admin, local_site_to_slur_regex, process_markdown},
utils::{get_url_blocklist, is_admin, process_markdown, slur_regex},
};
use lemmy_db_schema::{
source::{
local_site::LocalSite,
tagline::{Tagline, TaglineUpdateForm},
},
source::tagline::{Tagline, TaglineUpdateForm},
traits::Crud,
};
use lemmy_db_views::structs::LocalUserView;
@ -24,9 +21,7 @@ pub async fn update_tagline(
// Make sure user is an admin
is_admin(&local_user_view)?;
let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;

View file

@ -11,10 +11,10 @@ use lemmy_api_common::{
check_user_valid,
generate_inbox_url,
honeypot_check,
local_site_to_slur_regex,
password_length_check,
send_new_applicant_email_to_admins,
send_verification_email_if_required,
slur_regex,
},
};
use lemmy_db_schema::{
@ -100,7 +100,7 @@ pub async fn register(
.await?;
}
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
check_slurs(&data.username, &slur_regex)?;
check_slurs_opt(&data.answer, &slur_regex)?;
@ -326,7 +326,7 @@ pub async fn authenticate_with_oauth(
.as_ref()
.ok_or(LemmyErrorType::RegistrationUsernameRequired)?;
let slur_regex = local_site_to_slur_regex(&local_site);
let slur_regex = slur_regex(&context).await?;
check_slurs(username, &slur_regex)?;
check_slurs_opt(&data.answer, &slur_regex)?;

View file

@ -22,13 +22,12 @@ use activitypub_federation::{
use chrono::{DateTime, Utc};
use lemmy_api_common::{
context::LemmyContext,
utils::{get_url_blocklist, is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown},
utils::{get_url_blocklist, is_mod_or_admin, process_markdown, slur_regex},
};
use lemmy_db_schema::{
source::{
comment::{Comment, CommentInsertForm, CommentUpdateForm},
community::Community,
local_site::LocalSite,
person::Person,
post::Post,
},
@ -175,11 +174,10 @@ impl Object for ApubComment {
let content = read_from_string_or_source(&note.content, &note.media_type, &note.source);
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let slur_regex = slur_regex(context).await?;
let url_blocklist = get_url_blocklist(context).await?;
let content = append_attachments_to_comment(content, &note.attachment, context).await?;
let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?;
let content = process_markdown(&content, &slur_regex, &url_blocklist, context).await?;
let content = markdown_rewrite_remote_links(content, context).await;
let language_id = Some(
LanguageTag::to_language_id_single(note.language.unwrap_or_default(), &mut context.pool())
@ -226,7 +224,7 @@ pub(crate) mod tests {
};
use assert_json_diff::assert_json_include;
use html2md::parse_html;
use lemmy_db_schema::source::site::Site;
use lemmy_db_schema::source::{local_site::LocalSite, site::Site};
use pretty_assertions::assert_eq;
use serial_test::serial;

View file

@ -22,9 +22,9 @@ use lemmy_api_common::{
generate_moderators_url,
generate_outbox_url,
get_url_blocklist,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
slur_regex,
},
};
use lemmy_db_schema::{
@ -33,7 +33,6 @@ use lemmy_db_schema::{
activity::ActorType,
actor_language::CommunityLanguage,
community::{Community, CommunityInsertForm, CommunityUpdateForm},
local_site::LocalSite,
},
traits::{ApubActor, Crud},
CommunityVisibility,
@ -137,11 +136,10 @@ impl Object for ApubCommunity {
async fn from_json(group: Group, context: &Data<Self::DataType>) -> LemmyResult<ApubCommunity> {
let instance_id = fetch_instance_actor_for_object(&group.id, context).await?;
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let slur_regex = slur_regex(context).await?;
let url_blocklist = get_url_blocklist(context).await?;
let sidebar = read_from_string_or_source_opt(&group.content, &None, &group.source);
let sidebar = process_markdown_opt(&sidebar, slur_regex, &url_blocklist, context).await?;
let sidebar = process_markdown_opt(&sidebar, &slur_regex, &url_blocklist, context).await?;
let sidebar = markdown_rewrite_remote_links_opt(sidebar, context).await;
let icon = proxy_image_link_opt_apub(group.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(group.image.map(|i| i.url), context).await?;

View file

@ -2,7 +2,6 @@ use crate::{
activities::GetActorType,
check_apub_id_valid_with_strictness,
fetcher::markdown_links::markdown_rewrite_remote_links_opt,
local_site_data_cached,
objects::read_from_string_or_source_opt,
protocol::{
objects::{instance::Instance, LanguageTag},
@ -23,12 +22,7 @@ use activitypub_federation::{
use chrono::{DateTime, Utc};
use lemmy_api_common::{
context::LemmyContext,
utils::{
get_url_blocklist,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
},
utils::{get_url_blocklist, process_markdown_opt, proxy_image_link_opt_apub, slur_regex},
};
use lemmy_db_schema::{
newtypes::InstanceId,
@ -37,7 +31,6 @@ use lemmy_db_schema::{
activity::ActorType,
actor_language::SiteLanguage,
instance::Instance as DbInstance,
local_site::LocalSite,
site::{Site, SiteInsertForm},
},
traits::Crud,
@ -127,8 +120,7 @@ impl Object for ApubSite {
verify_domains_match(expected_domain, apub.id.inner())?;
verify_is_remote_object(&apub.id, data)?;
let local_site_data = local_site_data_cached(&mut data.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);
let slur_regex = &slur_regex(data).await?;
check_slurs(&apub.name, slur_regex)?;
check_slurs_opt(&apub.summary, slur_regex)?;
@ -143,11 +135,10 @@ impl Object for ApubSite {
.ok_or(FederationError::UrlWithoutDomain)?;
let instance = DbInstance::read_or_create(&mut context.pool(), domain.to_string()).await?;
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let slur_regex = slur_regex(context).await?;
let url_blocklist = get_url_blocklist(context).await?;
let sidebar = read_from_string_or_source_opt(&apub.content, &None, &apub.source);
let sidebar = process_markdown_opt(&sidebar, slur_regex, &url_blocklist, context).await?;
let sidebar = process_markdown_opt(&sidebar, &slur_regex, &url_blocklist, context).await?;
let sidebar = markdown_rewrite_remote_links_opt(sidebar, context).await;
let icon = proxy_image_link_opt_apub(apub.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(apub.image.map(|i| i.url), context).await?;

View file

@ -2,7 +2,6 @@ use crate::{
activities::GetActorType,
check_apub_id_valid_with_strictness,
fetcher::markdown_links::markdown_rewrite_remote_links_opt,
local_site_data_cached,
objects::{instance::fetch_instance_actor_for_object, read_from_string_or_source_opt},
protocol::{
objects::person::{Person, UserTypes},
@ -21,16 +20,15 @@ use lemmy_api_common::{
utils::{
generate_outbox_url,
get_url_blocklist,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
slur_regex,
},
};
use lemmy_db_schema::{
sensitive::SensitiveString,
source::{
activity::ActorType,
local_site::LocalSite,
person::{Person as DbPerson, PersonInsertForm, PersonUpdateForm},
},
traits::{ApubActor, Crud},
@ -123,28 +121,26 @@ impl Object for ApubPerson {
expected_domain: &Url,
context: &Data<Self::DataType>,
) -> LemmyResult<()> {
let local_site_data = local_site_data_cached(&mut context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);
check_slurs(&person.preferred_username, slur_regex)?;
check_slurs_opt(&person.name, slur_regex)?;
let slur_regex = slur_regex(context).await?;
check_slurs(&person.preferred_username, &slur_regex)?;
check_slurs_opt(&person.name, &slur_regex)?;
verify_domains_match(person.id.inner(), expected_domain)?;
verify_is_remote_object(&person.id, context)?;
check_apub_id_valid_with_strictness(person.id.inner(), false, context).await?;
let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source);
check_slurs_opt(&bio, slur_regex)?;
check_slurs_opt(&bio, &slur_regex)?;
Ok(())
}
async fn from_json(person: Person, context: &Data<Self::DataType>) -> LemmyResult<ApubPerson> {
let instance_id = fetch_instance_actor_for_object(&person.id, context).await?;
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let slur_regex = slur_regex(context).await?;
let url_blocklist = get_url_blocklist(context).await?;
let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source);
let bio = process_markdown_opt(&bio, slur_regex, &url_blocklist, context).await?;
let bio = process_markdown_opt(&bio, &slur_regex, &url_blocklist, context).await?;
let bio = markdown_rewrite_remote_links_opt(bio, context).await;
let avatar = proxy_image_link_opt_apub(person.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(person.image.map(|i| i.url), context).await?;

View file

@ -2,7 +2,6 @@ use crate::{
activities::{generate_to, verify_person_in_community, verify_visibility},
check_apub_id_valid_with_strictness,
fetcher::markdown_links::{markdown_rewrite_remote_links_opt, to_local_url},
local_site_data_cached,
objects::read_from_string_or_source_opt,
protocol::{
objects::{
@ -28,12 +27,11 @@ use html2text::{from_read_with_decorator, render::TrivialDecorator};
use lemmy_api_common::{
context::LemmyContext,
request::generate_post_link_metadata,
utils::{get_url_blocklist, local_site_opt_to_slur_regex, process_markdown_opt},
utils::{get_url_blocklist, process_markdown_opt, slur_regex},
};
use lemmy_db_schema::{
source::{
community::Community,
local_site::LocalSite,
person::Person,
post::{Post, PostInsertForm, PostUpdateForm},
},
@ -164,9 +162,8 @@ impl Object for ApubPost {
check_apub_id_valid_with_strictness(page.id.inner(), community.local, context).await?;
verify_person_in_community(&page.creator()?, &community, context).await?;
let local_site_data = local_site_data_cached(&mut context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);
check_slurs_opt(&page.name, slur_regex)?;
let slur_regex = slur_regex(context).await?;
check_slurs_opt(&page.name, &slur_regex)?;
verify_domains_match(page.creator()?.inner(), page.id.inner())?;
verify_visibility(&page.to, &page.cc, &community)?;
@ -214,8 +211,6 @@ impl Object for ApubPost {
}
let first_attachment = page.attachment.first();
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let url = if let Some(attachment) = first_attachment.cloned() {
Some(attachment.url())
} else if page.kind == PageType::Video {
@ -237,10 +232,10 @@ impl Object for ApubPost {
let alt_text = first_attachment.cloned().and_then(Attachment::alt_text);
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let slur_regex = slur_regex(context).await?;
let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source);
let body = process_markdown_opt(&body, slur_regex, &url_blocklist, context).await?;
let body = process_markdown_opt(&body, &slur_regex, &url_blocklist, context).await?;
let body = markdown_rewrite_remote_links_opt(body, context).await;
let language_id = Some(
LanguageTag::to_language_id_single(page.language.unwrap_or_default(), &mut context.pool())

View file

@ -18,17 +18,11 @@ use activitypub_federation::{
use chrono::{DateTime, Utc};
use lemmy_api_common::{
context::LemmyContext,
utils::{
check_private_messages_enabled,
get_url_blocklist,
local_site_opt_to_slur_regex,
process_markdown,
},
utils::{check_private_messages_enabled, get_url_blocklist, process_markdown, slur_regex},
};
use lemmy_db_schema::{
source::{
instance::Instance,
local_site::LocalSite,
person::Person,
person_block::PersonBlock,
private_message::{PrivateMessage as DbPrivateMessage, PrivateMessageInsertForm},
@ -152,12 +146,11 @@ impl Object for ApubPrivateMessage {
{
check_private_messages_enabled(&recipient_local_user)?;
}
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let slur_regex = slur_regex(context).await?;
let url_blocklist = get_url_blocklist(context).await?;
let content = read_from_string_or_source(&note.content, &None, &note.source);
let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?;
let content = process_markdown(&content, &slur_regex, &url_blocklist, context).await?;
let content = markdown_rewrite_remote_links(content, context).await;
let form = PrivateMessageInsertForm {

View file

@ -6,7 +6,6 @@ use crate::{
community_moderators::ApubCommunityModerators,
community_outbox::ApubCommunityOutbox,
},
local_site_data_cached,
objects::community::ApubCommunity,
protocol::{
objects::{Endpoints, LanguageTag},
@ -26,7 +25,7 @@ use activitypub_federation::{
},
};
use chrono::{DateTime, Utc};
use lemmy_api_common::{context::LemmyContext, utils::local_site_opt_to_slur_regex};
use lemmy_api_common::{context::LemmyContext, utils::slur_regex};
use lemmy_utils::{
error::LemmyResult,
utils::slurs::{check_slurs, check_slurs_opt},
@ -89,12 +88,11 @@ impl Group {
check_apub_id_valid_with_strictness(self.id.inner(), true, context).await?;
verify_domains_match(expected_domain, self.id.inner())?;
let local_site_data = local_site_data_cached(&mut context.pool()).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site_data.local_site);
let slur_regex = slur_regex(context).await?;
check_slurs(&self.preferred_username, slur_regex)?;
check_slurs_opt(&self.name, slur_regex)?;
check_slurs_opt(&self.summary, slur_regex)?;
check_slurs(&self.preferred_username, &slur_regex)?;
check_slurs_opt(&self.name, &slur_regex)?;
check_slurs_opt(&self.summary, &slur_regex)?;
Ok(())
}
}

View file

@ -1,45 +1,25 @@
use crate::error::{LemmyErrorExt, LemmyErrorType, LemmyResult};
use regex::{Regex, RegexBuilder};
use regex::Regex;
pub fn remove_slurs(test: &str, slur_regex: &Option<LemmyResult<Regex>>) -> String {
if let Some(Ok(slur_regex)) = slur_regex {
slur_regex.replace_all(test, "*removed*").to_string()
} else {
test.to_string()
}
pub fn remove_slurs(test: &str, slur_regex: &Regex) -> String {
slur_regex.replace_all(test, "*removed*").to_string()
}
pub(crate) fn slur_check<'a>(
test: &'a str,
slur_regex: &'a Option<LemmyResult<Regex>>,
) -> Result<(), Vec<&'a str>> {
if let Some(Ok(slur_regex)) = slur_regex {
let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
pub(crate) fn slur_check<'a>(test: &'a str, slur_regex: &'a Regex) -> Result<(), Vec<&'a str>> {
let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
// Unique
matches.sort_unstable();
matches.dedup();
// Unique
matches.sort_unstable();
matches.dedup();
if matches.is_empty() {
Ok(())
} else {
Err(matches)
}
} else {
if matches.is_empty() {
Ok(())
} else {
Err(matches)
}
}
pub fn build_slur_regex(regex_str: Option<&str>) -> Option<LemmyResult<Regex>> {
regex_str.map(|slurs| {
RegexBuilder::new(slurs)
.case_insensitive(true)
.build()
.with_lemmy_type(LemmyErrorType::InvalidRegex)
})
}
pub fn check_slurs(text: &str, slur_regex: &Option<LemmyResult<Regex>>) -> LemmyResult<()> {
pub fn check_slurs(text: &str, slur_regex: &Regex) -> LemmyResult<()> {
if let Err(slurs) = slur_check(text, slur_regex) {
Err(anyhow::anyhow!("{}", slurs_vec_to_str(&slurs))).with_lemmy_type(LemmyErrorType::Slurs)
} else {
@ -47,10 +27,7 @@ pub fn check_slurs(text: &str, slur_regex: &Option<LemmyResult<Regex>>) -> Lemmy
}
}
pub fn check_slurs_opt(
text: &Option<String>,
slur_regex: &Option<LemmyResult<Regex>>,
) -> LemmyResult<()> {
pub fn check_slurs_opt(text: &Option<String>, slur_regex: &Regex) -> LemmyResult<()> {
match text {
Some(t) => check_slurs(t, slur_regex),
None => Ok(()),
@ -67,7 +44,7 @@ pub(crate) fn slurs_vec_to_str(slurs: &[&str]) -> String {
mod test {
use crate::{
error::{LemmyErrorExt, LemmyErrorType, LemmyResult},
error::LemmyResult,
utils::slurs::{remove_slurs, slur_check, slurs_vec_to_str},
};
use pretty_assertions::assert_eq;
@ -75,7 +52,7 @@ mod test {
#[test]
fn test_slur_filter() -> LemmyResult<()> {
let slur_regex = Some(RegexBuilder::new(r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|ni((g{2,}|q)+|[gq]{2,})[e3r]+(s|z)?|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().with_lemmy_type(LemmyErrorType::InvalidRegex));
let slur_regex = RegexBuilder::new(r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|ni((g{2,}|q)+|[gq]{2,})[e3r]+(s|z)?|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build()?;
let test =
"faggot test kike tranny cocksucker retardeds. Capitalized Niggerz. This is a bunch of other safe text.";
let slur_free = "No slurs here";

View file

@ -221,31 +221,34 @@ fn min_length_check(item: &str, min_length: usize, min_msg: LemmyErrorType) -> L
}
/// Attempts to build a regex and check it for common errors before inserting into the DB.
pub fn build_and_check_regex(regex_str_opt: &Option<&str>) -> Option<LemmyResult<Regex>> {
pub fn build_and_check_regex(regex_str_opt: Option<&str>) -> LemmyResult<Regex> {
// Placeholder regex which doesnt match anything
// https://stackoverflow.com/a/940840
let match_nothing = RegexBuilder::new("a^")
.build()
.with_lemmy_type(LemmyErrorType::InvalidRegex);
if let Some(regex) = regex_str_opt {
if regex.is_empty() {
None
match_nothing
} else {
Some(
RegexBuilder::new(regex)
.case_insensitive(true)
.build()
.with_lemmy_type(LemmyErrorType::InvalidRegex)
.and_then(|regex| {
// NOTE: It is difficult to know, in the universe of user-crafted regex, which ones
// may match against any string text. To keep it simple, we'll match the regex
// against an innocuous string - a single number - which should help catch a regex
// that accidentally matches against all strings.
if regex.is_match("1") {
Err(LemmyErrorType::PermissiveRegex.into())
} else {
Ok(regex)
}
}),
)
RegexBuilder::new(regex)
.case_insensitive(true)
.build()
.with_lemmy_type(LemmyErrorType::InvalidRegex)
.and_then(|regex| {
// NOTE: It is difficult to know, in the universe of user-crafted regex, which ones
// may match against any string text. To keep it simple, we'll match the regex
// against an innocuous string - a single number - which should help catch a regex
// that accidentally matches against all strings.
if regex.is_match("1") {
Err(LemmyErrorType::PermissiveRegex.into())
} else {
Ok(regex)
}
})
}
} else {
None
match_nothing
}
}
@ -566,46 +569,39 @@ Line3",
}
#[test]
fn test_valid_slur_regex() {
fn test_valid_slur_regex() -> LemmyResult<()> {
let valid_regex = Some("(foo|bar)");
let result = build_and_check_regex(&valid_regex);
assert!(
result.is_some_and(|x| x.is_ok()),
"Testing regex: {:?}",
valid_regex
);
}
build_and_check_regex(valid_regex)?;
#[test]
fn test_missing_slur_regex() {
let missing_regex = None;
let result = build_and_check_regex(&missing_regex);
assert!(result.is_none());
}
let match_none = build_and_check_regex(missing_regex)?;
assert!(!match_none.is_match(""));
assert!(!match_none.is_match("a"));
#[test]
fn test_empty_slur_regex() {
let empty = Some("");
let result = build_and_check_regex(&empty);
assert!(result.is_none());
let match_none = build_and_check_regex(empty)?;
assert!(!match_none.is_match(""));
assert!(!match_none.is_match("a"));
Ok(())
}
#[test]
fn test_too_permissive_slur_regex() {
let match_everything_regexes = [
(&Some("["), LemmyErrorType::InvalidRegex),
(&Some("(foo|bar|)"), LemmyErrorType::PermissiveRegex),
(&Some(".*"), LemmyErrorType::PermissiveRegex),
(Some("["), LemmyErrorType::InvalidRegex),
(Some("(foo|bar|)"), LemmyErrorType::PermissiveRegex),
(Some(".*"), LemmyErrorType::PermissiveRegex),
];
match_everything_regexes
.iter()
.into_iter()
.for_each(|(regex_str, expected_err)| {
let result = build_and_check_regex(regex_str);
assert!(result.as_ref().is_some_and(Result::is_err));
assert!(result.is_err());
assert!(
result.is_some_and(|x| x.is_err_and(|e| e.error_type.eq(&expected_err.clone()))),
result.is_err_and(|e| e.error_type.eq(&expected_err.clone())),
"Testing regex {:?}, expected error {}",
regex_str,
expected_err