From b77ab870d47e8a9807d038bb3c2278793778d1b4 Mon Sep 17 00:00:00 2001 From: SleeplessOne1917 <28871516+SleeplessOne1917@users.noreply.github.com> Date: Tue, 10 Jun 2025 13:42:37 +0000 Subject: [PATCH] Add permitted display characters check for post titles (#5692) * Add permitted display characters check for post titles * Remove accidental formatting change * Use invisible-chars crate for validation * Adding invisible chars library (#5759) --------- Co-authored-by: Dessalines Co-authored-by: Dessalines --- .gitignore | 2 +- Cargo.lock | 12 +++++ crates/utils/Cargo.toml | 1 + crates/utils/src/utils/validation.rs | 66 +++------------------------- 4 files changed, 21 insertions(+), 60 deletions(-) diff --git a/.gitignore b/.gitignore index bed8f7d24..2545d1714 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ ansible/passwords/ # docker build files docker/lemmy_mine.hjson docker/dev/env_deploy.sh -volumes +docker/volumes # ide config .idea diff --git a/Cargo.lock b/Cargo.lock index ebe7183f5..3fd2865e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2937,6 +2937,17 @@ dependencies = [ "generic-array", ] +[[package]] +name = "invisible-characters" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c68bbf95a074c9961fa4f8c43d172557101239c508d18f74e87e7a41d6ab4ac" +dependencies = [ + "anyhow", + "serde", + "serde_json", +] + [[package]] name = "io-extras" version = "0.18.4" @@ -4004,6 +4015,7 @@ dependencies = [ "futures", "git-version", "http 1.3.1", + "invisible-characters", "itertools 0.14.0", "markdown-it", "markdown-it-block-spoiler", diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index e7b449f12..0e488313d 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -82,6 +82,7 @@ markdown-it-footnote = "0.2.0" moka = { workspace = true, optional = true } git-version = "0.3.9" unicode-segmentation = "1.12.0" +invisible-characters = "0.1.3" [dev-dependencies] pretty_assertions = { workspace = true } diff --git a/crates/utils/src/utils/validation.rs b/crates/utils/src/utils/validation.rs index fee8f80da..8c4575410 100644 --- a/crates/utils/src/utils/validation.rs +++ b/crates/utils/src/utils/validation.rs @@ -1,5 +1,6 @@ use crate::error::{LemmyErrorExt, LemmyErrorType, LemmyResult, MAX_API_PARAM_ELEMENTS}; use clearurls::UrlCleaner; +use invisible_characters::INVISIBLE_CHARS; use itertools::Itertools; use regex::{Regex, RegexBuilder, RegexSet}; use std::sync::LazyLock; @@ -30,62 +31,6 @@ const MIN_LENGTH_BLOCKING_KEYWORD: usize = 3; const MAX_LENGTH_BLOCKING_KEYWORD: usize = 50; const TAG_NAME_MIN_LENGTH: usize = 3; const TAG_NAME_MAX_LENGTH: usize = 100; -//Invisible unicode characters, taken from https://invisible-characters.com/ -const FORBIDDEN_DISPLAY_CHARS: [char; 53] = [ - '\u{0009}', - '\u{00a0}', - '\u{00ad}', - '\u{034f}', - '\u{061c}', - '\u{115f}', - '\u{1160}', - '\u{17b4}', - '\u{17b5}', - '\u{180e}', - '\u{2000}', - '\u{2001}', - '\u{2002}', - '\u{2003}', - '\u{2004}', - '\u{2005}', - '\u{2006}', - '\u{2007}', - '\u{2008}', - '\u{2009}', - '\u{200a}', - '\u{200b}', - '\u{200c}', - '\u{200d}', - '\u{200e}', - '\u{200f}', - '\u{202f}', - '\u{205f}', - '\u{2060}', - '\u{2061}', - '\u{2062}', - '\u{2063}', - '\u{2064}', - '\u{206a}', - '\u{206b}', - '\u{206c}', - '\u{206d}', - '\u{206e}', - '\u{206f}', - '\u{3000}', - '\u{2800}', - '\u{3164}', - '\u{feff}', - '\u{ffa0}', - '\u{1d159}', - '\u{1d173}', - '\u{1d174}', - '\u{1d175}', - '\u{1d176}', - '\u{1d177}', - '\u{1d178}', - '\u{1d179}', - '\u{1d17a}', -]; fn has_newline(name: &str) -> bool { name.contains('\n') @@ -113,7 +58,7 @@ pub fn is_valid_actor_name(name: &str, actor_name_max_length: i32) -> LemmyResul fn has_3_permitted_display_chars(name: &str) -> bool { let mut num_non_fdc: i8 = 0; for c in name.chars() { - if !FORBIDDEN_DISPLAY_CHARS.contains(&c) { + if !INVISIBLE_CHARS.contains(&c) { num_non_fdc += 1; if num_non_fdc >= 3 { break; @@ -130,7 +75,7 @@ fn has_3_permitted_display_chars(name: &str) -> bool { pub fn is_valid_display_name(name: &str, actor_name_max_length: i32) -> LemmyResult<()> { let actor_name_max_length: usize = actor_name_max_length.try_into()?; let check = !name.starts_with('@') - && !name.starts_with(FORBIDDEN_DISPLAY_CHARS) + && !name.starts_with(INVISIBLE_CHARS) && name.chars().count() <= actor_name_max_length && !has_newline(name) && has_3_permitted_display_chars(name); @@ -152,7 +97,8 @@ pub fn is_valid_matrix_id(matrix_id: &str) -> LemmyResult<()> { pub fn is_valid_post_title(title: &str) -> LemmyResult<()> { let length = title.trim().chars().count(); - let check = (3..=200).contains(&length) && !has_newline(title); + let check = + (3..=200).contains(&length) && !has_newline(title) && has_3_permitted_display_chars(title); if !check { Err(LemmyErrorType::InvalidPostTitle.into()) } else { @@ -540,6 +486,8 @@ Line3", .is_ok()); assert!(is_valid_post_title(" POST TITLE 😃😃😃😃😃").is_ok()); assert!(is_valid_post_title("\n \n \n \n ").is_err()); // tabs/spaces/newlines + assert!(is_valid_post_title("\u{206a}").is_err()); // invisible chars + assert!(is_valid_post_title("\u{1f3f3}\u{fe0f}\u{200d}\u{26a7}\u{fe0f}").is_ok()); } #[test]