Ignore mentions and hashtags inside code blocks
This commit is contained in:
parent
67b1729621
commit
2b8063990a
3 changed files with 45 additions and 6 deletions
|
@ -2,6 +2,7 @@ use regex::{Captures, Regex};
|
||||||
|
|
||||||
use crate::errors::ValidationError;
|
use crate::errors::ValidationError;
|
||||||
use crate::frontend::get_tag_page_url;
|
use crate::frontend::get_tag_page_url;
|
||||||
|
use super::links::is_inside_code_block;
|
||||||
|
|
||||||
const HASHTAG_RE: &str = r"(?m)(?P<before>^|\s|>|[\(])#(?P<tag>[^\s<]+)";
|
const HASHTAG_RE: &str = r"(?m)(?P<before>^|\s|>|[\(])#(?P<tag>[^\s<]+)";
|
||||||
const HASHTAG_SECONDARY_RE: &str = r"^(?P<tag>[0-9A-Za-z]+)(?P<after>[\.,:?\)]?)$";
|
const HASHTAG_SECONDARY_RE: &str = r"^(?P<tag>[0-9A-Za-z]+)(?P<after>[\.,:?\)]?)$";
|
||||||
|
@ -13,6 +14,11 @@ pub fn find_hashtags(text: &str) -> Vec<String> {
|
||||||
let hashtag_secondary_re = Regex::new(HASHTAG_SECONDARY_RE).unwrap();
|
let hashtag_secondary_re = Regex::new(HASHTAG_SECONDARY_RE).unwrap();
|
||||||
let mut tags = vec![];
|
let mut tags = vec![];
|
||||||
for caps in hashtag_re.captures_iter(text) {
|
for caps in hashtag_re.captures_iter(text) {
|
||||||
|
let tag_match = caps.name("tag").expect("should have tag group");
|
||||||
|
if is_inside_code_block(&tag_match, text) {
|
||||||
|
// Ignore hashtags inside code blocks
|
||||||
|
continue;
|
||||||
|
};
|
||||||
if let Some(secondary_caps) = hashtag_secondary_re.captures(&caps["tag"]) {
|
if let Some(secondary_caps) = hashtag_secondary_re.captures(&caps["tag"]) {
|
||||||
let tag_name = secondary_caps["tag"].to_string().to_lowercase();
|
let tag_name = secondary_caps["tag"].to_string().to_lowercase();
|
||||||
if !tags.contains(&tag_name) {
|
if !tags.contains(&tag_name) {
|
||||||
|
@ -28,6 +34,11 @@ pub fn replace_hashtags(instance_url: &str, text: &str, tags: &[String]) -> Stri
|
||||||
let hashtag_re = Regex::new(HASHTAG_RE).unwrap();
|
let hashtag_re = Regex::new(HASHTAG_RE).unwrap();
|
||||||
let hashtag_secondary_re = Regex::new(HASHTAG_SECONDARY_RE).unwrap();
|
let hashtag_secondary_re = Regex::new(HASHTAG_SECONDARY_RE).unwrap();
|
||||||
let result = hashtag_re.replace_all(text, |caps: &Captures| {
|
let result = hashtag_re.replace_all(text, |caps: &Captures| {
|
||||||
|
let tag_match = caps.name("tag").expect("should have tag group");
|
||||||
|
if is_inside_code_block(&tag_match, text) {
|
||||||
|
// Don't replace hashtags inside code blocks
|
||||||
|
return caps[0].to_string();
|
||||||
|
};
|
||||||
if let Some(secondary_caps) = hashtag_secondary_re.captures(&caps["tag"]) {
|
if let Some(secondary_caps) = hashtag_secondary_re.captures(&caps["tag"]) {
|
||||||
let before = caps["before"].to_string();
|
let before = caps["before"].to_string();
|
||||||
let tag = secondary_caps["tag"].to_string();
|
let tag = secondary_caps["tag"].to_string();
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use regex::{Captures, Regex};
|
use regex::{Captures, Match, Regex};
|
||||||
use tokio_postgres::GenericClient;
|
use tokio_postgres::GenericClient;
|
||||||
|
|
||||||
use crate::database::DatabaseError;
|
use crate::database::DatabaseError;
|
||||||
|
@ -10,10 +10,10 @@ use super::types::Post;
|
||||||
// MediaWiki-like syntax: [[url|text]]
|
// MediaWiki-like syntax: [[url|text]]
|
||||||
const OBJECT_LINK_SEARCH_RE: &str = r"(?m)\[\[(?P<url>[^\s\|]+)(\|(?P<text>.+?))?\]\]";
|
const OBJECT_LINK_SEARCH_RE: &str = r"(?m)\[\[(?P<url>[^\s\|]+)(\|(?P<text>.+?))?\]\]";
|
||||||
|
|
||||||
fn is_inside_code_block(caps: &Captures, text: &str) -> bool {
|
pub fn is_inside_code_block(match_: &Match, text: &str) -> bool {
|
||||||
// TODO: remove workaround.
|
// TODO: remove workaround.
|
||||||
// Perform replacement only inside text nodes during markdown parsing
|
// Perform replacement only inside text nodes during markdown parsing
|
||||||
let text_before = &text[0..caps.name("url").unwrap().start()];
|
let text_before = &text[0..match_.start()];
|
||||||
let code_open = text_before.matches("<code>").count();
|
let code_open = text_before.matches("<code>").count();
|
||||||
let code_closed = text_before.matches("</code>").count();
|
let code_closed = text_before.matches("</code>").count();
|
||||||
code_open > code_closed
|
code_open > code_closed
|
||||||
|
@ -24,10 +24,12 @@ fn find_object_links(text: &str) -> Vec<String> {
|
||||||
let link_re = Regex::new(OBJECT_LINK_SEARCH_RE).unwrap();
|
let link_re = Regex::new(OBJECT_LINK_SEARCH_RE).unwrap();
|
||||||
let mut links = vec![];
|
let mut links = vec![];
|
||||||
for caps in link_re.captures_iter(text) {
|
for caps in link_re.captures_iter(text) {
|
||||||
let url = caps["url"].to_string();
|
let url_match = caps.name("url").expect("should have url group");
|
||||||
if is_inside_code_block(&caps, text) {
|
if is_inside_code_block(&url_match, text) {
|
||||||
|
// Ignore links inside code blocks
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
let url = caps["url"].to_string();
|
||||||
if !links.contains(&url) {
|
if !links.contains(&url) {
|
||||||
links.push(url);
|
links.push(url);
|
||||||
};
|
};
|
||||||
|
@ -60,12 +62,17 @@ pub fn replace_object_links(
|
||||||
) -> String {
|
) -> String {
|
||||||
let mention_re = Regex::new(OBJECT_LINK_SEARCH_RE).unwrap();
|
let mention_re = Regex::new(OBJECT_LINK_SEARCH_RE).unwrap();
|
||||||
let result = mention_re.replace_all(text, |caps: &Captures| {
|
let result = mention_re.replace_all(text, |caps: &Captures| {
|
||||||
|
let url_match = caps.name("url").expect("should have url group");
|
||||||
|
if is_inside_code_block(&url_match, text) {
|
||||||
|
// Don't replace inside code blocks
|
||||||
|
return caps[0].to_string();
|
||||||
|
};
|
||||||
let url = caps["url"].to_string();
|
let url = caps["url"].to_string();
|
||||||
let link_text = caps.name("text")
|
let link_text = caps.name("text")
|
||||||
.map(|match_| match_.as_str())
|
.map(|match_| match_.as_str())
|
||||||
.unwrap_or(&url)
|
.unwrap_or(&url)
|
||||||
.to_string();
|
.to_string();
|
||||||
if link_map.contains_key(&url) && !is_inside_code_block(caps, text) {
|
if link_map.contains_key(&url) {
|
||||||
return format!(r#"<a href="{0}">{1}</a>"#, url, link_text);
|
return format!(r#"<a href="{0}">{1}</a>"#, url, link_text);
|
||||||
};
|
};
|
||||||
// Leave unchanged if post does not exist
|
// Leave unchanged if post does not exist
|
||||||
|
@ -84,6 +91,16 @@ mod tests {
|
||||||
"test ([[https://example.org/2]])",
|
"test ([[https://example.org/2]])",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_is_inside_code_block() {
|
||||||
|
let text = "abc<code>&&</code>xyz";
|
||||||
|
let regexp = Regex::new("&&").unwrap();
|
||||||
|
let mat = regexp.find(text).unwrap();
|
||||||
|
assert_eq!(mat.start(), 9);
|
||||||
|
let result = is_inside_code_block(&mat, text);
|
||||||
|
assert_eq!(result, true);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_find_object_links() {
|
fn test_find_object_links() {
|
||||||
let results = find_object_links(TEXT_WITH_OBJECT_LINKS);
|
let results = find_object_links(TEXT_WITH_OBJECT_LINKS);
|
||||||
|
|
|
@ -8,6 +8,7 @@ use crate::database::DatabaseError;
|
||||||
use crate::errors::ValidationError;
|
use crate::errors::ValidationError;
|
||||||
use crate::models::profiles::queries::get_profiles_by_accts;
|
use crate::models::profiles::queries::get_profiles_by_accts;
|
||||||
use crate::models::profiles::types::DbActorProfile;
|
use crate::models::profiles::types::DbActorProfile;
|
||||||
|
use super::links::is_inside_code_block;
|
||||||
|
|
||||||
// See also: ACTOR_ADDRESS_RE in activitypub::actors::types
|
// See also: ACTOR_ADDRESS_RE in activitypub::actors::types
|
||||||
const MENTION_RE: &str = r"@?(?P<username>[\w\.-]+)@(?P<hostname>.+)";
|
const MENTION_RE: &str = r"@?(?P<username>[\w\.-]+)@(?P<hostname>.+)";
|
||||||
|
@ -23,6 +24,11 @@ fn find_mentions(
|
||||||
let mention_secondary_re = Regex::new(MENTION_SEARCH_SECONDARY_RE).unwrap();
|
let mention_secondary_re = Regex::new(MENTION_SEARCH_SECONDARY_RE).unwrap();
|
||||||
let mut mentions = vec![];
|
let mut mentions = vec![];
|
||||||
for caps in mention_re.captures_iter(text) {
|
for caps in mention_re.captures_iter(text) {
|
||||||
|
let mention_match = caps.name("mention").expect("should have mention group");
|
||||||
|
if is_inside_code_block(&mention_match, text) {
|
||||||
|
// No mentions inside code blocks
|
||||||
|
continue;
|
||||||
|
};
|
||||||
if let Some(secondary_caps) = mention_secondary_re.captures(&caps["mention"]) {
|
if let Some(secondary_caps) = mention_secondary_re.captures(&caps["mention"]) {
|
||||||
let username = secondary_caps["username"].to_string();
|
let username = secondary_caps["username"].to_string();
|
||||||
let hostname = secondary_caps.name("hostname")
|
let hostname = secondary_caps.name("hostname")
|
||||||
|
@ -62,6 +68,11 @@ pub fn replace_mentions(
|
||||||
let mention_re = Regex::new(MENTION_SEARCH_RE).unwrap();
|
let mention_re = Regex::new(MENTION_SEARCH_RE).unwrap();
|
||||||
let mention_secondary_re = Regex::new(MENTION_SEARCH_SECONDARY_RE).unwrap();
|
let mention_secondary_re = Regex::new(MENTION_SEARCH_SECONDARY_RE).unwrap();
|
||||||
let result = mention_re.replace_all(text, |caps: &Captures| {
|
let result = mention_re.replace_all(text, |caps: &Captures| {
|
||||||
|
let mention_match = caps.name("mention").expect("should have mention group");
|
||||||
|
if is_inside_code_block(&mention_match, text) {
|
||||||
|
// Don't replace mentions inside code blocks
|
||||||
|
return caps[0].to_string();
|
||||||
|
};
|
||||||
if let Some(secondary_caps) = mention_secondary_re.captures(&caps["mention"]) {
|
if let Some(secondary_caps) = mention_secondary_re.captures(&caps["mention"]) {
|
||||||
let username = secondary_caps["username"].to_string();
|
let username = secondary_caps["username"].to_string();
|
||||||
let hostname = secondary_caps.name("hostname")
|
let hostname = secondary_caps.name("hostname")
|
||||||
|
|
Loading…
Reference in a new issue