From 6fc319f7dd0d84a084a8a4d126d112fb87ae8eed Mon Sep 17 00:00:00 2001 From: silverpill Date: Tue, 8 Feb 2022 21:33:05 +0000 Subject: [PATCH] Validate content of incoming Note objects --- docs/openapi.yaml | 4 +++- src/activitypub/receiver.rs | 14 +++++++++++++- src/models/posts/types.rs | 4 ++-- src/models/profiles/types.rs | 6 +++--- src/utils/html.rs | 19 ++++++++++++++++++- 5 files changed, 39 insertions(+), 8 deletions(-) diff --git a/docs/openapi.yaml b/docs/openapi.yaml index 04f99e8..5a53fd7 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -337,7 +337,9 @@ paths: type: object properties: status: - description: Text content of the post. + description: | + Text content of the post. + Allowed HTML tags: a, br, pre, code. type: string 'media_ids[]': description: Array of Attachment ids to be attached as media. diff --git a/src/activitypub/receiver.rs b/src/activitypub/receiver.rs index e69a4d4..2388625 100644 --- a/src/activitypub/receiver.rs +++ b/src/activitypub/receiver.rs @@ -37,6 +37,7 @@ use crate::models::relationships::queries::{ unfollow, }; use crate::models::users::queries::get_user_by_name; +use crate::utils::html::clean_html; use super::activity::{Object, Activity, create_activity_accept_follow}; use super::actor::Actor; use super::constants::AP_PUBLIC; @@ -132,6 +133,16 @@ fn get_object_id(object: Value) -> Result { Ok(object_id) } +const CONTENT_MAX_SIZE: usize = 100000; + +fn clean_note_content(content: &str) -> Result { + if content.len() > CONTENT_MAX_SIZE { + return Err(ValidationError("content is too long")); + }; + let content_safe = clean_html(content); + Ok(content_safe) +} + pub async fn process_note( config: &Config, db_client: &mut impl GenericClient, @@ -221,6 +232,7 @@ pub async fn process_note( ).await?; let content = object.content .ok_or(ValidationError("no content"))?; + let content_cleaned = clean_note_content(&content)?; let mut attachments: Vec = Vec::new(); if let Some(list) = object.attachment { let mut downloaded = vec![]; @@ -375,7 +387,7 @@ pub async fn process_note( Visibility::Direct }; let post_data = PostCreateData { - content, + content: content_cleaned, in_reply_to_id, repost_of_id: None, visibility, diff --git a/src/models/posts/types.rs b/src/models/posts/types.rs index 445cbf3..97fab8a 100644 --- a/src/models/posts/types.rs +++ b/src/models/posts/types.rs @@ -10,7 +10,7 @@ use crate::database::int_enum::{int_enum_from_sql, int_enum_to_sql}; use crate::errors::{ConversionError, DatabaseError, ValidationError}; use crate::models::attachments::types::DbMediaAttachment; use crate::models::profiles::types::DbActorProfile; -use crate::utils::html::clean_html; +use crate::utils::html::clean_html_strict; #[derive(Clone, Debug, PartialEq)] pub enum Visibility { @@ -220,7 +220,7 @@ impl PostCreateData { if self.content.chars().count() > character_limit { return Err(ValidationError("post is too long")); }; - let content_safe = clean_html(&self.content); + let content_safe = clean_html_strict(&self.content); let content_trimmed = content_safe.trim(); if content_trimmed.is_empty() { return Err(ValidationError("post can not be empty")); diff --git a/src/models/profiles/types.rs b/src/models/profiles/types.rs index 308ea00..5bbbfba 100644 --- a/src/models/profiles/types.rs +++ b/src/models/profiles/types.rs @@ -11,7 +11,7 @@ use uuid::Uuid; use crate::activitypub::actor::Actor; use crate::activitypub::views::get_actor_url; use crate::errors::ValidationError; -use crate::utils::html::clean_html; +use crate::utils::html::clean_html_strict; use super::validators::{ validate_username, validate_display_name, @@ -173,12 +173,12 @@ pub struct ProfileUpdateData { impl ProfileUpdateData { pub fn clean(&mut self) -> Result<(), ValidationError> { // Validate and clean bio - self.bio = self.bio.as_ref().map(|val| clean_html(val)); + self.bio = self.bio.as_ref().map(|val| clean_html_strict(val)); // Clean extra fields and remove fields with empty labels self.extra_fields = self.extra_fields.iter().cloned() .map(|mut field| { field.name = field.name.trim().to_string(); - field.value = clean_html(&field.value); + field.value = clean_html_strict(&field.value); field }) .filter(|field| !field.name.is_empty()) diff --git a/src/utils/html.rs b/src/utils/html.rs index a1635cc..afd88ab 100644 --- a/src/utils/html.rs +++ b/src/utils/html.rs @@ -3,6 +3,16 @@ use std::collections::HashSet; use ammonia::Builder; pub fn clean_html(unsafe_html: &str) -> String { + let safe_html = Builder::default() + .add_generic_attributes(&["class"]) + .add_tag_attributes("a", &["rel", "target"]) + .link_rel(None) + .clean(unsafe_html) + .to_string(); + safe_html +} + +pub fn clean_html_strict(unsafe_html: &str) -> String { let mut allowed_tags = HashSet::new(); allowed_tags.insert("a"); allowed_tags.insert("br"); @@ -22,8 +32,15 @@ mod tests { #[test] fn test_clean_html() { - let unsafe_html = r#"

test bold with link and code

"#; + let unsafe_html = r#"

@user test

"#; let safe_html = clean_html(unsafe_html); + assert_eq!(safe_html, r#"

@user test

"#); + } + + #[test] + fn test_clean_html_strict() { + let unsafe_html = r#"

test bold with link and code

"#; + let safe_html = clean_html_strict(unsafe_html); assert_eq!(safe_html, r#"test bold with link and code"#); } }