2022-10-14 17:52:05 +00:00
|
|
|
use std::collections::HashMap;
|
|
|
|
|
2022-12-18 00:38:29 +00:00
|
|
|
use regex::{Captures, Match, Regex};
|
2022-10-14 17:52:05 +00:00
|
|
|
|
2023-03-22 22:45:43 +00:00
|
|
|
use crate::activitypub::fetcher::helpers::get_post_by_object_id;
|
2023-01-17 23:14:18 +00:00
|
|
|
use crate::database::{DatabaseClient, DatabaseError};
|
2023-03-23 18:35:05 +00:00
|
|
|
use crate::models::posts::types::{Post, Visibility};
|
2022-10-14 17:52:05 +00:00
|
|
|
|
2022-10-28 15:29:48 +00:00
|
|
|
// MediaWiki-like syntax: [[url|text]]
|
2022-10-26 00:36:07 +00:00
|
|
|
const OBJECT_LINK_SEARCH_RE: &str = r"(?m)\[\[(?P<url>[^\s\|]+)(\|(?P<text>.+?))?\]\]";
|
2022-10-14 17:52:05 +00:00
|
|
|
|
2022-12-18 00:38:29 +00:00
|
|
|
pub fn is_inside_code_block(match_: &Match, text: &str) -> bool {
|
2022-10-15 23:43:27 +00:00
|
|
|
// TODO: remove workaround.
|
|
|
|
// Perform replacement only inside text nodes during markdown parsing
|
2022-12-18 00:38:29 +00:00
|
|
|
let text_before = &text[0..match_.start()];
|
2022-10-15 23:43:27 +00:00
|
|
|
let code_open = text_before.matches("<code>").count();
|
|
|
|
let code_closed = text_before.matches("</code>").count();
|
|
|
|
code_open > code_closed
|
|
|
|
}
|
|
|
|
|
2022-10-14 17:52:05 +00:00
|
|
|
/// Finds everything that looks like an object link
|
|
|
|
fn find_object_links(text: &str) -> Vec<String> {
|
|
|
|
let link_re = Regex::new(OBJECT_LINK_SEARCH_RE).unwrap();
|
|
|
|
let mut links = vec![];
|
|
|
|
for caps in link_re.captures_iter(text) {
|
2022-12-18 00:38:29 +00:00
|
|
|
let url_match = caps.name("url").expect("should have url group");
|
|
|
|
if is_inside_code_block(&url_match, text) {
|
|
|
|
// Ignore links inside code blocks
|
2022-10-15 23:43:27 +00:00
|
|
|
continue;
|
|
|
|
};
|
2022-12-18 00:38:29 +00:00
|
|
|
let url = caps["url"].to_string();
|
2022-10-14 17:52:05 +00:00
|
|
|
if !links.contains(&url) {
|
|
|
|
links.push(url);
|
|
|
|
};
|
|
|
|
};
|
|
|
|
links
|
|
|
|
}
|
|
|
|
|
|
|
|
pub async fn find_linked_posts(
|
2023-01-17 23:14:18 +00:00
|
|
|
db_client: &impl DatabaseClient,
|
2022-10-14 17:52:05 +00:00
|
|
|
instance_url: &str,
|
|
|
|
text: &str,
|
|
|
|
) -> Result<HashMap<String, Post>, DatabaseError> {
|
|
|
|
let links = find_object_links(text);
|
|
|
|
let mut link_map: HashMap<String, Post> = HashMap::new();
|
2022-12-19 21:40:28 +00:00
|
|
|
let mut counter = 0;
|
2022-10-14 17:52:05 +00:00
|
|
|
for url in links {
|
2022-12-19 21:40:28 +00:00
|
|
|
if counter > 10 {
|
|
|
|
// Limit the number of queries
|
|
|
|
break;
|
|
|
|
// TODO: single database query
|
|
|
|
};
|
2022-12-19 21:24:19 +00:00
|
|
|
match get_post_by_object_id(
|
2022-10-14 17:52:05 +00:00
|
|
|
db_client,
|
|
|
|
instance_url,
|
|
|
|
&url,
|
2022-12-19 21:24:19 +00:00
|
|
|
).await {
|
|
|
|
Ok(post) => {
|
|
|
|
if post.repost_of_id.is_some() {
|
|
|
|
// Can't reference reposts
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
if post.visibility != Visibility::Public {
|
|
|
|
// Can't reference non-public posts
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
link_map.insert(url, post);
|
|
|
|
},
|
|
|
|
// If post doesn't exist in database, link is ignored
|
|
|
|
Err(DatabaseError::NotFound(_)) => continue,
|
|
|
|
Err(other_error) => return Err(other_error),
|
|
|
|
};
|
2022-12-19 21:40:28 +00:00
|
|
|
counter += 1;
|
2022-10-14 17:52:05 +00:00
|
|
|
};
|
|
|
|
Ok(link_map)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn replace_object_links(
|
|
|
|
link_map: &HashMap<String, Post>,
|
|
|
|
text: &str,
|
|
|
|
) -> String {
|
|
|
|
let mention_re = Regex::new(OBJECT_LINK_SEARCH_RE).unwrap();
|
|
|
|
let result = mention_re.replace_all(text, |caps: &Captures| {
|
2022-12-18 00:38:29 +00:00
|
|
|
let url_match = caps.name("url").expect("should have url group");
|
|
|
|
if is_inside_code_block(&url_match, text) {
|
|
|
|
// Don't replace inside code blocks
|
|
|
|
return caps[0].to_string();
|
|
|
|
};
|
2022-10-14 17:52:05 +00:00
|
|
|
let url = caps["url"].to_string();
|
2022-10-26 00:36:07 +00:00
|
|
|
let link_text = caps.name("text")
|
|
|
|
.map(|match_| match_.as_str())
|
|
|
|
.unwrap_or(&url)
|
|
|
|
.to_string();
|
2022-12-18 00:38:29 +00:00
|
|
|
if link_map.contains_key(&url) {
|
2022-10-26 00:36:07 +00:00
|
|
|
return format!(r#"<a href="{0}">{1}</a>"#, url, link_text);
|
2022-10-14 17:52:05 +00:00
|
|
|
};
|
|
|
|
// Leave unchanged if post does not exist
|
|
|
|
caps[0].to_string()
|
|
|
|
});
|
|
|
|
result.to_string()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
const TEXT_WITH_OBJECT_LINKS: &str = concat!(
|
|
|
|
"test [[https://example.org/1]] link ",
|
2022-10-26 00:36:07 +00:00
|
|
|
"test link with [[https://example.org/1|text]] ",
|
2022-10-14 17:52:05 +00:00
|
|
|
"test ([[https://example.org/2]])",
|
|
|
|
);
|
|
|
|
|
2022-12-18 00:38:29 +00:00
|
|
|
#[test]
|
|
|
|
fn test_is_inside_code_block() {
|
|
|
|
let text = "abc<code>&&</code>xyz";
|
|
|
|
let regexp = Regex::new("&&").unwrap();
|
|
|
|
let mat = regexp.find(text).unwrap();
|
|
|
|
assert_eq!(mat.start(), 9);
|
|
|
|
let result = is_inside_code_block(&mat, text);
|
|
|
|
assert_eq!(result, true);
|
|
|
|
}
|
|
|
|
|
2022-10-14 17:52:05 +00:00
|
|
|
#[test]
|
|
|
|
fn test_find_object_links() {
|
|
|
|
let results = find_object_links(TEXT_WITH_OBJECT_LINKS);
|
|
|
|
assert_eq!(results, vec![
|
|
|
|
"https://example.org/1",
|
|
|
|
"https://example.org/2",
|
|
|
|
]);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_object_links() {
|
|
|
|
let mut link_map = HashMap::new();
|
|
|
|
link_map.insert("https://example.org/1".to_string(), Post::default());
|
|
|
|
link_map.insert("https://example.org/2".to_string(), Post::default());
|
|
|
|
let result = replace_object_links(&link_map, TEXT_WITH_OBJECT_LINKS);
|
|
|
|
let expected_result = concat!(
|
|
|
|
r#"test <a href="https://example.org/1">https://example.org/1</a> link "#,
|
2022-10-26 00:36:07 +00:00
|
|
|
r#"test link with <a href="https://example.org/1">text</a> "#,
|
2022-10-14 17:52:05 +00:00
|
|
|
r#"test (<a href="https://example.org/2">https://example.org/2</a>)"#,
|
|
|
|
);
|
|
|
|
assert_eq!(result, expected_result);
|
|
|
|
}
|
|
|
|
}
|