Remove invalid XML characters from RSS feeds (#4416)

* Remove all characters that are disallowed by XML

* Combine contiguous unicode ranges into one range
This commit is contained in:
Elara 2024-01-30 06:55:45 -08:00 committed by GitHub
parent a09027c4c0
commit 328d48ef7e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -92,6 +92,23 @@ static RSS_NAMESPACE: Lazy<BTreeMap<String, String>> = Lazy::new(|| {
h h
}); });
/// Removes any characters disallowed by the XML grammar.
/// See https://www.w3.org/TR/xml/#NT-Char for details.
fn sanitize_xml(input: String) -> String {
input
.chars()
.filter(|&c| {
matches!(c,
'\u{09}'
| '\u{0A}'
| '\u{0D}'
| '\u{20}'..='\u{D7FF}'
| '\u{E000}'..='\u{FFFD}'
| '\u{10000}'..='\u{10FFFF}')
})
.collect()
}
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
async fn get_all_feed( async fn get_all_feed(
info: web::Query<Params>, info: web::Query<Params>,
@ -256,10 +273,9 @@ async fn get_feed_user(
.await?; .await?;
let items = create_post_items(posts, &context.settings().get_protocol_and_hostname())?; let items = create_post_items(posts, &context.settings().get_protocol_and_hostname())?;
let channel = Channel { let channel = Channel {
namespaces: RSS_NAMESPACE.clone(), namespaces: RSS_NAMESPACE.clone(),
title: format!("{} - {}", site_view.site.name, person.name), title: format!("{} - {}", sanitize_xml(site_view.site.name), person.name),
link: person.actor_id.to_string(), link: person.actor_id.to_string(),
items, items,
..Default::default() ..Default::default()
@ -298,7 +314,7 @@ async fn get_feed_community(
let mut channel = Channel { let mut channel = Channel {
namespaces: RSS_NAMESPACE.clone(), namespaces: RSS_NAMESPACE.clone(),
title: format!("{} - {}", site_view.site.name, community.name), title: format!("{} - {}", sanitize_xml(site_view.site.name), community.name),
link: community.actor_id.to_string(), link: community.actor_id.to_string(),
items, items,
..Default::default() ..Default::default()
@ -337,10 +353,9 @@ async fn get_feed_front(
let protocol_and_hostname = context.settings().get_protocol_and_hostname(); let protocol_and_hostname = context.settings().get_protocol_and_hostname();
let items = create_post_items(posts, &protocol_and_hostname)?; let items = create_post_items(posts, &protocol_and_hostname)?;
let mut channel = Channel { let mut channel = Channel {
namespaces: RSS_NAMESPACE.clone(), namespaces: RSS_NAMESPACE.clone(),
title: format!("{} - Subscribed", site_view.site.name), title: format!("{} - Subscribed", sanitize_xml(site_view.site.name)),
link: protocol_and_hostname, link: protocol_and_hostname,
items, items,
..Default::default() ..Default::default()
@ -391,7 +406,7 @@ async fn get_feed_inbox(context: &LemmyContext, jwt: &str) -> Result<Channel, Le
let mut channel = Channel { let mut channel = Channel {
namespaces: RSS_NAMESPACE.clone(), namespaces: RSS_NAMESPACE.clone(),
title: format!("{} - Inbox", site_view.site.name), title: format!("{} - Inbox", sanitize_xml(site_view.site.name)),
link: format!("{protocol_and_hostname}/inbox"), link: format!("{protocol_and_hostname}/inbox"),
items, items,
..Default::default() ..Default::default()
@ -537,11 +552,11 @@ fn create_post_items(
} }
let i = Item { let i = Item {
title: Some(sanitize_html(&p.post.name)), title: Some(sanitize_html(sanitize_xml(p.post.name).as_str())),
pub_date: Some(p.post.published.to_rfc2822()), pub_date: Some(p.post.published.to_rfc2822()),
comments: Some(post_url.clone()), comments: Some(post_url.clone()),
guid, guid,
description: Some(description), description: Some(sanitize_xml(description)),
dublin_core_ext, dublin_core_ext,
link, link,
extensions, extensions,