Ignore mentions and hashtags inside code blocks
This commit is contained in:
parent
67b1729621
commit
2b8063990a
3 changed files with 45 additions and 6 deletions
|
@ -2,6 +2,7 @@ use regex::{Captures, Regex};
|
|||
|
||||
use crate::errors::ValidationError;
|
||||
use crate::frontend::get_tag_page_url;
|
||||
use super::links::is_inside_code_block;
|
||||
|
||||
const HASHTAG_RE: &str = r"(?m)(?P<before>^|\s|>|[\(])#(?P<tag>[^\s<]+)";
|
||||
const HASHTAG_SECONDARY_RE: &str = r"^(?P<tag>[0-9A-Za-z]+)(?P<after>[\.,:?\)]?)$";
|
||||
|
@ -13,6 +14,11 @@ pub fn find_hashtags(text: &str) -> Vec<String> {
|
|||
let hashtag_secondary_re = Regex::new(HASHTAG_SECONDARY_RE).unwrap();
|
||||
let mut tags = vec![];
|
||||
for caps in hashtag_re.captures_iter(text) {
|
||||
let tag_match = caps.name("tag").expect("should have tag group");
|
||||
if is_inside_code_block(&tag_match, text) {
|
||||
// Ignore hashtags inside code blocks
|
||||
continue;
|
||||
};
|
||||
if let Some(secondary_caps) = hashtag_secondary_re.captures(&caps["tag"]) {
|
||||
let tag_name = secondary_caps["tag"].to_string().to_lowercase();
|
||||
if !tags.contains(&tag_name) {
|
||||
|
@ -28,6 +34,11 @@ pub fn replace_hashtags(instance_url: &str, text: &str, tags: &[String]) -> Stri
|
|||
let hashtag_re = Regex::new(HASHTAG_RE).unwrap();
|
||||
let hashtag_secondary_re = Regex::new(HASHTAG_SECONDARY_RE).unwrap();
|
||||
let result = hashtag_re.replace_all(text, |caps: &Captures| {
|
||||
let tag_match = caps.name("tag").expect("should have tag group");
|
||||
if is_inside_code_block(&tag_match, text) {
|
||||
// Don't replace hashtags inside code blocks
|
||||
return caps[0].to_string();
|
||||
};
|
||||
if let Some(secondary_caps) = hashtag_secondary_re.captures(&caps["tag"]) {
|
||||
let before = caps["before"].to_string();
|
||||
let tag = secondary_caps["tag"].to_string();
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use regex::{Captures, Regex};
|
||||
use regex::{Captures, Match, Regex};
|
||||
use tokio_postgres::GenericClient;
|
||||
|
||||
use crate::database::DatabaseError;
|
||||
|
@ -10,10 +10,10 @@ use super::types::Post;
|
|||
// MediaWiki-like syntax: [[url|text]]
|
||||
const OBJECT_LINK_SEARCH_RE: &str = r"(?m)\[\[(?P<url>[^\s\|]+)(\|(?P<text>.+?))?\]\]";
|
||||
|
||||
fn is_inside_code_block(caps: &Captures, text: &str) -> bool {
|
||||
pub fn is_inside_code_block(match_: &Match, text: &str) -> bool {
|
||||
// TODO: remove workaround.
|
||||
// Perform replacement only inside text nodes during markdown parsing
|
||||
let text_before = &text[0..caps.name("url").unwrap().start()];
|
||||
let text_before = &text[0..match_.start()];
|
||||
let code_open = text_before.matches("<code>").count();
|
||||
let code_closed = text_before.matches("</code>").count();
|
||||
code_open > code_closed
|
||||
|
@ -24,10 +24,12 @@ fn find_object_links(text: &str) -> Vec<String> {
|
|||
let link_re = Regex::new(OBJECT_LINK_SEARCH_RE).unwrap();
|
||||
let mut links = vec![];
|
||||
for caps in link_re.captures_iter(text) {
|
||||
let url = caps["url"].to_string();
|
||||
if is_inside_code_block(&caps, text) {
|
||||
let url_match = caps.name("url").expect("should have url group");
|
||||
if is_inside_code_block(&url_match, text) {
|
||||
// Ignore links inside code blocks
|
||||
continue;
|
||||
};
|
||||
let url = caps["url"].to_string();
|
||||
if !links.contains(&url) {
|
||||
links.push(url);
|
||||
};
|
||||
|
@ -60,12 +62,17 @@ pub fn replace_object_links(
|
|||
) -> String {
|
||||
let mention_re = Regex::new(OBJECT_LINK_SEARCH_RE).unwrap();
|
||||
let result = mention_re.replace_all(text, |caps: &Captures| {
|
||||
let url_match = caps.name("url").expect("should have url group");
|
||||
if is_inside_code_block(&url_match, text) {
|
||||
// Don't replace inside code blocks
|
||||
return caps[0].to_string();
|
||||
};
|
||||
let url = caps["url"].to_string();
|
||||
let link_text = caps.name("text")
|
||||
.map(|match_| match_.as_str())
|
||||
.unwrap_or(&url)
|
||||
.to_string();
|
||||
if link_map.contains_key(&url) && !is_inside_code_block(caps, text) {
|
||||
if link_map.contains_key(&url) {
|
||||
return format!(r#"<a href="{0}">{1}</a>"#, url, link_text);
|
||||
};
|
||||
// Leave unchanged if post does not exist
|
||||
|
@ -84,6 +91,16 @@ mod tests {
|
|||
"test ([[https://example.org/2]])",
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn test_is_inside_code_block() {
|
||||
let text = "abc<code>&&</code>xyz";
|
||||
let regexp = Regex::new("&&").unwrap();
|
||||
let mat = regexp.find(text).unwrap();
|
||||
assert_eq!(mat.start(), 9);
|
||||
let result = is_inside_code_block(&mat, text);
|
||||
assert_eq!(result, true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_object_links() {
|
||||
let results = find_object_links(TEXT_WITH_OBJECT_LINKS);
|
||||
|
|
|
@ -8,6 +8,7 @@ use crate::database::DatabaseError;
|
|||
use crate::errors::ValidationError;
|
||||
use crate::models::profiles::queries::get_profiles_by_accts;
|
||||
use crate::models::profiles::types::DbActorProfile;
|
||||
use super::links::is_inside_code_block;
|
||||
|
||||
// See also: ACTOR_ADDRESS_RE in activitypub::actors::types
|
||||
const MENTION_RE: &str = r"@?(?P<username>[\w\.-]+)@(?P<hostname>.+)";
|
||||
|
@ -23,6 +24,11 @@ fn find_mentions(
|
|||
let mention_secondary_re = Regex::new(MENTION_SEARCH_SECONDARY_RE).unwrap();
|
||||
let mut mentions = vec![];
|
||||
for caps in mention_re.captures_iter(text) {
|
||||
let mention_match = caps.name("mention").expect("should have mention group");
|
||||
if is_inside_code_block(&mention_match, text) {
|
||||
// No mentions inside code blocks
|
||||
continue;
|
||||
};
|
||||
if let Some(secondary_caps) = mention_secondary_re.captures(&caps["mention"]) {
|
||||
let username = secondary_caps["username"].to_string();
|
||||
let hostname = secondary_caps.name("hostname")
|
||||
|
@ -62,6 +68,11 @@ pub fn replace_mentions(
|
|||
let mention_re = Regex::new(MENTION_SEARCH_RE).unwrap();
|
||||
let mention_secondary_re = Regex::new(MENTION_SEARCH_SECONDARY_RE).unwrap();
|
||||
let result = mention_re.replace_all(text, |caps: &Captures| {
|
||||
let mention_match = caps.name("mention").expect("should have mention group");
|
||||
if is_inside_code_block(&mention_match, text) {
|
||||
// Don't replace mentions inside code blocks
|
||||
return caps[0].to_string();
|
||||
};
|
||||
if let Some(secondary_caps) = mention_secondary_re.captures(&caps["mention"]) {
|
||||
let username = secondary_caps["username"].to_string();
|
||||
let hostname = secondary_caps.name("hostname")
|
||||
|
|
Loading…
Reference in a new issue