2022-10-25 23:15:29 +00:00
use std ::collections ::{ HashMap , HashSet } ;
2022-10-06 21:16:52 +00:00
use std ::iter ::FromIterator ;
2021-04-09 00:22:17 +00:00
use ammonia ::Builder ;
2023-02-12 21:38:22 +00:00
pub use ammonia ::{ clean_text as escape_html } ;
2022-11-29 16:46:00 +00:00
pub fn clean_html (
unsafe_html : & str ,
allowed_classes : Vec < ( & 'static str , Vec < & 'static str > ) > ,
) -> String {
let mut builder = Builder ::default ( ) ;
for ( tag , classes ) in allowed_classes . iter ( ) {
builder . add_allowed_classes ( tag , classes ) ;
} ;
let safe_html = builder
2022-10-12 23:21:54 +00:00
// Remove src from external images to prevent tracking
. set_tag_attribute_value ( " img " , " src " , " " )
2022-10-09 13:04:34 +00:00
// Always add rel="noopener"
. link_rel ( Some ( " noopener " ) )
2022-02-08 21:33:05 +00:00
. clean ( unsafe_html )
. to_string ( ) ;
safe_html
}
2022-10-06 21:16:52 +00:00
pub fn clean_html_strict (
unsafe_html : & str ,
allowed_tags : & [ & str ] ,
2022-10-25 23:15:29 +00:00
allowed_classes : Vec < ( & 'static str , Vec < & 'static str > ) > ,
2022-10-06 21:16:52 +00:00
) -> String {
let allowed_tags =
HashSet ::from_iter ( allowed_tags . iter ( ) . copied ( ) ) ;
2022-10-25 23:15:29 +00:00
let mut allowed_classes_map = HashMap ::new ( ) ;
for ( tag , classes ) in allowed_classes {
allowed_classes_map . insert (
tag ,
HashSet ::from_iter ( classes . into_iter ( ) ) ,
) ;
} ;
2021-04-09 00:22:17 +00:00
let safe_html = Builder ::default ( )
. tags ( allowed_tags )
2022-10-25 23:15:29 +00:00
. allowed_classes ( allowed_classes_map )
2022-10-09 13:04:34 +00:00
. link_rel ( Some ( " noopener " ) )
2021-04-09 00:22:17 +00:00
. clean ( unsafe_html )
. to_string ( ) ;
safe_html
}
2022-04-13 17:45:47 +00:00
pub fn clean_html_all ( html : & str ) -> String {
let text = Builder ::empty ( )
. clean ( html )
. to_string ( ) ;
text
}
2021-04-09 00:22:17 +00:00
#[ cfg(test) ]
mod tests {
use super ::* ;
#[ test ]
fn test_clean_html ( ) {
2022-10-12 23:21:54 +00:00
let unsafe_html = concat! (
r # "<p><span class="h-card"><a href="https://example.com/user" class="u-url mention" rel="ugc">@<span>user</span></a></span> test</p>"# ,
2022-11-29 16:46:00 +00:00
r # "<p><img src="https://example.com/image.png" class="picture"></p>"# ,
2022-10-12 23:21:54 +00:00
) ;
let expected_safe_html = concat! (
r # "<p><span class="h-card"><a href="https://example.com/user" class="u-url mention" rel="noopener">@<span>user</span></a></span> test</p>"# ,
r # "<p><img src=""></p>"# ,
) ;
2022-11-29 16:46:00 +00:00
let safe_html = clean_html (
unsafe_html ,
vec! [
( " a " , vec! [ " mention " , " u-url " ] ) ,
( " span " , vec! [ " h-card " ] ) ,
] ,
) ;
2022-10-12 23:21:54 +00:00
assert_eq! ( safe_html , expected_safe_html ) ;
2022-02-08 21:33:05 +00:00
}
#[ test ]
fn test_clean_html_strict ( ) {
2022-10-25 23:15:29 +00:00
let unsafe_html = r # "<p><span class="h-card"><a href="https://example.com/user" class="u-url mention" rel="ugc">@<span>user</span></a></span> test <b>bold</b><script>dangerous</script> with <a href="https://example.com" target="_blank" rel="noopener">link</a> and <code>code</code></p>"# ;
let safe_html = clean_html_strict (
unsafe_html ,
& [ " a " , " br " , " code " , " p " , " span " ] ,
vec! [
( " a " , vec! [ " mention " , " u-url " ] ) ,
( " span " , vec! [ " h-card " ] ) ,
] ,
) ;
assert_eq! ( safe_html , r # "<p><span class="h-card"><a href="https://example.com/user" class="u-url mention" rel="noopener">@<span>user</span></a></span> test bold with <a href="https://example.com" rel="noopener">link</a> and <code>code</code></p>"# ) ;
2021-04-09 00:22:17 +00:00
}
2022-04-13 17:45:47 +00:00
#[ test ]
fn test_clean_html_all ( ) {
let html = r # "<p>test <b>bold</b><script>dangerous</script> with <a href="https://example.com">link</a> and <code>code</code></p>"# ;
let text = clean_html_all ( html ) ;
assert_eq! ( text , " test bold with link and code " ) ;
}
2021-04-09 00:22:17 +00:00
}