From 328d48ef7e611cb0e9aae0d34c5abcae3537f54f Mon Sep 17 00:00:00 2001 From: Elara Date: Tue, 30 Jan 2024 06:55:45 -0800 Subject: [PATCH] Remove invalid XML characters from RSS feeds (#4416) * Remove all characters that are disallowed by XML * Combine contiguous unicode ranges into one range --- crates/routes/src/feeds.rs | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/crates/routes/src/feeds.rs b/crates/routes/src/feeds.rs index cd9f2d1b0..9695401df 100644 --- a/crates/routes/src/feeds.rs +++ b/crates/routes/src/feeds.rs @@ -92,6 +92,23 @@ static RSS_NAMESPACE: Lazy> = Lazy::new(|| { h }); +/// Removes any characters disallowed by the XML grammar. +/// See https://www.w3.org/TR/xml/#NT-Char for details. +fn sanitize_xml(input: String) -> String { + input + .chars() + .filter(|&c| { + matches!(c, + '\u{09}' + | '\u{0A}' + | '\u{0D}' + | '\u{20}'..='\u{D7FF}' + | '\u{E000}'..='\u{FFFD}' + | '\u{10000}'..='\u{10FFFF}') + }) + .collect() +} + #[tracing::instrument(skip_all)] async fn get_all_feed( info: web::Query, @@ -256,10 +273,9 @@ async fn get_feed_user( .await?; let items = create_post_items(posts, &context.settings().get_protocol_and_hostname())?; - let channel = Channel { namespaces: RSS_NAMESPACE.clone(), - title: format!("{} - {}", site_view.site.name, person.name), + title: format!("{} - {}", sanitize_xml(site_view.site.name), person.name), link: person.actor_id.to_string(), items, ..Default::default() @@ -298,7 +314,7 @@ async fn get_feed_community( let mut channel = Channel { namespaces: RSS_NAMESPACE.clone(), - title: format!("{} - {}", site_view.site.name, community.name), + title: format!("{} - {}", sanitize_xml(site_view.site.name), community.name), link: community.actor_id.to_string(), items, ..Default::default() @@ -337,10 +353,9 @@ async fn get_feed_front( let protocol_and_hostname = context.settings().get_protocol_and_hostname(); let items = create_post_items(posts, &protocol_and_hostname)?; - let mut channel = Channel { namespaces: RSS_NAMESPACE.clone(), - title: format!("{} - Subscribed", site_view.site.name), + title: format!("{} - Subscribed", sanitize_xml(site_view.site.name)), link: protocol_and_hostname, items, ..Default::default() @@ -391,7 +406,7 @@ async fn get_feed_inbox(context: &LemmyContext, jwt: &str) -> Result