From acad0f260bbe5e567fddc621a5ee0fcda48b643d Mon Sep 17 00:00:00 2001 From: silverpill Date: Tue, 25 Oct 2022 23:15:29 +0000 Subject: [PATCH] Perform post content cleaning at the end of validation process --- src/mastodon_api/statuses/views.rs | 3 ++- src/models/posts/validators.rs | 17 +++++++++++++++-- src/models/profiles/validators.rs | 4 ++-- src/utils/html.rs | 24 ++++++++++++++++++++---- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/mastodon_api/statuses/views.rs b/src/mastodon_api/statuses/views.rs index 05b0bfd..008290f 100644 --- a/src/mastodon_api/statuses/views.rs +++ b/src/mastodon_api/statuses/views.rs @@ -58,7 +58,6 @@ async fn create_status( let current_user = get_current_user(db_client, auth.token()).await?; let instance = config.instance(); let mut post_data = PostCreateData::try_from(status_data.into_inner())?; - post_data.clean()?; // Mentions let mention_map = find_mentioned_profiles( db_client, @@ -184,6 +183,8 @@ async fn create_status( // Remove duplicate mentions post_data.mentions.sort(); post_data.mentions.dedup(); + // Clean content + post_data.clean()?; // Create post let mut post = create_post(db_client, ¤t_user.id, post_data).await?; post.in_reply_to = maybe_in_reply_to.map(|mut in_reply_to| { diff --git a/src/models/posts/validators.rs b/src/models/posts/validators.rs index acd795e..a98be39 100644 --- a/src/models/posts/validators.rs +++ b/src/models/posts/validators.rs @@ -2,7 +2,7 @@ use crate::errors::ValidationError; use crate::utils::html::clean_html_strict; pub const CONTENT_MAX_SIZE: usize = 100000; -const CONTENT_ALLOWED_TAGS: [&str; 7] = [ +const CONTENT_ALLOWED_TAGS: [&str; 8] = [ "a", "br", "pre", @@ -10,8 +10,17 @@ const CONTENT_ALLOWED_TAGS: [&str; 7] = [ "strong", "em", "p", + "span", ]; +fn content_allowed_classes() -> Vec<(&'static str, Vec<&'static str>)> { + vec![ + ("a", vec!["hashtag", "mention", "u-url"]), + ("span", vec!["h-card"]), + ("p", vec!["inline-quote"]), + ] +} + pub fn clean_content( content: &str, ) -> Result { @@ -20,7 +29,11 @@ pub fn clean_content( if content.len() > CONTENT_MAX_SIZE { return Err(ValidationError("post is too long")); }; - let content_safe = clean_html_strict(content, &CONTENT_ALLOWED_TAGS); + let content_safe = clean_html_strict( + content, + &CONTENT_ALLOWED_TAGS, + content_allowed_classes(), + ); let content_trimmed = content_safe.trim(); if content_trimmed.is_empty() { return Err(ValidationError("post can not be empty")); diff --git a/src/models/profiles/validators.rs b/src/models/profiles/validators.rs index 5e5ce3d..8190687 100644 --- a/src/models/profiles/validators.rs +++ b/src/models/profiles/validators.rs @@ -43,7 +43,7 @@ pub fn clean_bio(bio: &str, is_remote: bool) -> Result if bio.chars().count() > BIO_MAX_LENGTH { return Err(ValidationError("bio is too long")); }; - clean_html_strict(bio, &BIO_ALLOWED_TAGS) + clean_html_strict(bio, &BIO_ALLOWED_TAGS, vec![]) }; Ok(cleaned_bio) } @@ -55,7 +55,7 @@ pub fn clean_extra_fields(extra_fields: &[ExtraField]) let mut cleaned_extra_fields = vec![]; for mut field in extra_fields.iter().cloned() { field.name = field.name.trim().to_string(); - field.value = clean_html_strict(&field.value, &BIO_ALLOWED_TAGS); + field.value = clean_html_strict(&field.value, &BIO_ALLOWED_TAGS, vec![]); if field.name.is_empty() { continue; }; diff --git a/src/utils/html.rs b/src/utils/html.rs index 92924e0..14c9e50 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::iter::FromIterator; use ammonia::Builder; @@ -18,11 +18,20 @@ pub fn clean_html(unsafe_html: &str) -> String { pub fn clean_html_strict( unsafe_html: &str, allowed_tags: &[&str], + allowed_classes: Vec<(&'static str, Vec<&'static str>)>, ) -> String { let allowed_tags = HashSet::from_iter(allowed_tags.iter().copied()); + let mut allowed_classes_map = HashMap::new(); + for (tag, classes) in allowed_classes { + allowed_classes_map.insert( + tag, + HashSet::from_iter(classes.into_iter()), + ); + }; let safe_html = Builder::default() .tags(allowed_tags) + .allowed_classes(allowed_classes_map) .link_rel(Some("noopener")) .clean(unsafe_html) .to_string(); @@ -56,9 +65,16 @@ mod tests { #[test] fn test_clean_html_strict() { - let unsafe_html = r#"

test bold with link and code

"#; - let safe_html = clean_html_strict(unsafe_html, &["a", "br", "code"]); - assert_eq!(safe_html, r#"test bold with link and code"#); + let unsafe_html = r#"

@user test bold with link and code

"#; + let safe_html = clean_html_strict( + unsafe_html, + &["a", "br", "code", "p", "span"], + vec![ + ("a", vec!["mention", "u-url"]), + ("span", vec!["h-card"]), + ], + ); + assert_eq!(safe_html, r#"

@user test bold with link and code

"#); } #[test]