Improve mention and hashtag parsers

Don't rely on whitespace and newlines.
This commit is contained in:
silverpill 2022-10-07 22:38:24 +00:00
parent da9bd480fb
commit 9cff428758
2 changed files with 19 additions and 11 deletions

View file

@ -3,8 +3,8 @@ use regex::{Captures, Regex};
use crate::errors::ValidationError; use crate::errors::ValidationError;
use crate::frontend::get_tag_page_url; use crate::frontend::get_tag_page_url;
const HASHTAG_RE: &str = r"(?m)(?P<before>^|\s|[\(])#(?P<tag>\S+)"; const HASHTAG_RE: &str = r"(?m)(?P<before>^|\s|>|[\(])#(?P<tag>[^\s<]+)";
const HASHTAG_SECONDARY_RE: &str = r"^(?P<tag>[0-9A-Za-z]+)(?P<after>[\.,:?\)]?(<br>)?)$"; const HASHTAG_SECONDARY_RE: &str = r"^(?P<tag>[0-9A-Za-z]+)(?P<after>[\.,:?\)]?)$";
const HASHTAG_NAME_RE: &str = r"^\w+$"; const HASHTAG_NAME_RE: &str = r"^\w+$";
/// Finds anything that looks like a hashtag /// Finds anything that looks like a hashtag
@ -66,7 +66,7 @@ mod tests {
const TEXT_WITH_TAGS: &str = concat!( const TEXT_WITH_TAGS: &str = concat!(
"@user1@server1 some text #TestTag.\n", "@user1@server1 some text #TestTag.\n",
"#TAG1 #tag1 #test_underscore #test*special ", "#TAG1 #tag1 #test_underscore #test*special ",
"more text (#tag2) text #tag3, #tag4:<br>\n", "more text (#tag2) text #tag3, #tag4:<br>",
"end with #tag5", "end with #tag5",
); );
@ -95,7 +95,7 @@ mod tests {
r#"#test_underscore #test*special "#, r#"#test_underscore #test*special "#,
r#"more text (<a class="hashtag" href="https://example.com/tag/tag2">#tag2</a>) text "#, r#"more text (<a class="hashtag" href="https://example.com/tag/tag2">#tag2</a>) text "#,
r#"<a class="hashtag" href="https://example.com/tag/tag3">#tag3</a>, "#, r#"<a class="hashtag" href="https://example.com/tag/tag3">#tag3</a>, "#,
r#"<a class="hashtag" href="https://example.com/tag/tag4">#tag4</a>:<br>"#, "\n", r#"<a class="hashtag" href="https://example.com/tag/tag4">#tag4</a>:<br>"#,
r#"end with <a class="hashtag" href="https://example.com/tag/tag5">#tag5</a>"#, r#"end with <a class="hashtag" href="https://example.com/tag/tag5">#tag5</a>"#,
); );
assert_eq!(output, expected_output); assert_eq!(output, expected_output);

View file

@ -10,8 +10,8 @@ use crate::models::profiles::types::DbActorProfile;
// See also: ACTOR_ADDRESS_RE in activitypub::actors::types // See also: ACTOR_ADDRESS_RE in activitypub::actors::types
const MENTION_RE: &str = r"@?(?P<user>[\w\.-]+)@(?P<instance>.+)"; const MENTION_RE: &str = r"@?(?P<user>[\w\.-]+)@(?P<instance>.+)";
const MENTION_SEARCH_RE: &str = r"(?m)(?P<before>^|\s|[\(])@(?P<user>[\w\.-]+)@(?P<instance>\S+)"; const MENTION_SEARCH_RE: &str = r"(?m)(?P<before>^|\s|>|[\(])@(?P<user>[\w\.-]+)@(?P<instance>[^\s<]+)";
const MENTION_SEARCH_SECONDARY_RE: &str = r"^(?P<instance>[\w\.-]+\w)(?P<after>[\.,:?\)]?(<br>)?)$"; const MENTION_SEARCH_SECONDARY_RE: &str = r"^(?P<instance>[\w\.-]+\w)(?P<after>[\.,:?\)]?)$";
/// Finds everything that looks like a mention /// Finds everything that looks like a mention
fn find_mentions( fn find_mentions(
@ -108,6 +108,7 @@ mod tests {
const INSTANCE_URL: &str = "https://server1.com"; const INSTANCE_URL: &str = "https://server1.com";
const TEXT_WITH_MENTIONS: &str = concat!( const TEXT_WITH_MENTIONS: &str = concat!(
"@user1@server1.com ", "@user1@server1.com ",
"@user_x@server1.com,<br>",
"(@user2@server2.com boosted) ", "(@user2@server2.com boosted) ",
"@user3@server2.com.\n", "@user3@server2.com.\n",
"@@invalid@server2.com ", "@@invalid@server2.com ",
@ -122,6 +123,7 @@ mod tests {
let results = find_mentions(INSTANCE_HOST, TEXT_WITH_MENTIONS); let results = find_mentions(INSTANCE_HOST, TEXT_WITH_MENTIONS);
assert_eq!(results, vec![ assert_eq!(results, vec![
"user1", "user1",
"user_x",
"user2@server2.com", "user2@server2.com",
"user3@server2.com", "user3@server2.com",
]); ]);
@ -129,13 +131,17 @@ mod tests {
#[test] #[test]
fn test_replace_mentions() { fn test_replace_mentions() {
// Local actor // Local actors
let profile_1 = DbActorProfile { let profile_1 = DbActorProfile {
username: "user1".to_string(), username: "user1".to_string(),
..Default::default() ..Default::default()
}; };
// Remote actors
let profile_2 = DbActorProfile { let profile_2 = DbActorProfile {
username: "user_x".to_string(),
..Default::default()
};
// Remote actors
let profile_3 = DbActorProfile {
username: "user2".to_string(), username: "user2".to_string(),
actor_json: Some(Actor { actor_json: Some(Actor {
id: "https://server2.com/actors/user2".to_string(), id: "https://server2.com/actors/user2".to_string(),
@ -144,7 +150,7 @@ mod tests {
}), }),
..Default::default() ..Default::default()
}; };
let profile_3 = DbActorProfile { let profile_4 = DbActorProfile {
username: "user3".to_string(), username: "user3".to_string(),
actor_json: Some(Actor { actor_json: Some(Actor {
id: "https://server2.com/actors/user3".to_string(), id: "https://server2.com/actors/user3".to_string(),
@ -155,8 +161,9 @@ mod tests {
}; };
let mention_map = HashMap::from([ let mention_map = HashMap::from([
("user1".to_string(), profile_1), ("user1".to_string(), profile_1),
("user2@server2.com".to_string(), profile_2), ("user_x".to_string(), profile_2),
("user3@server2.com".to_string(), profile_3), ("user2@server2.com".to_string(), profile_3),
("user3@server2.com".to_string(), profile_4),
]); ]);
let result = replace_mentions( let result = replace_mentions(
&mention_map, &mention_map,
@ -167,6 +174,7 @@ mod tests {
let expected_result = concat!( let expected_result = concat!(
r#"<span class="h-card"><a class="u-url mention" href="https://server1.com/users/user1">@user1</a></span> "#, r#"<span class="h-card"><a class="u-url mention" href="https://server1.com/users/user1">@user1</a></span> "#,
r#"<span class="h-card"><a class="u-url mention" href="https://server1.com/users/user_x">@user_x</a></span>,<br>"#,
r#"(<span class="h-card"><a class="u-url mention" href="https://server2.com/@user2">@user2</a></span> boosted) "#, r#"(<span class="h-card"><a class="u-url mention" href="https://server2.com/@user2">@user2</a></span> boosted) "#,
r#"<span class="h-card"><a class="u-url mention" href="https://server2.com/@user3">@user3</a></span>."#, "\n", r#"<span class="h-card"><a class="u-url mention" href="https://server2.com/@user3">@user3</a></span>."#, "\n",
r#"@@invalid@server2.com @test@server3.com@nospace@server4.com "#, r#"@@invalid@server2.com @test@server3.com@nospace@server4.com "#,