Parse and store hashtags

This commit is contained in:
silverpill 2021-12-07 23:28:58 +00:00
parent 3be313a0bf
commit c4fdb46df7
13 changed files with 258 additions and 18 deletions

View file

@ -0,0 +1,10 @@
CREATE TABLE tag (
id SERIAL PRIMARY KEY,
tag_name VARCHAR(100) UNIQUE NOT NULL
);
CREATE TABLE post_tag (
post_id UUID NOT NULL REFERENCES post (id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tag (id) ON DELETE CASCADE,
PRIMARY KEY (post_id, tag_id)
);

View file

@ -85,6 +85,17 @@ CREATE TABLE mention (
PRIMARY KEY (post_id, profile_id)
);
CREATE TABLE tag (
id SERIAL PRIMARY KEY,
tag_name VARCHAR(100) UNIQUE NOT NULL
);
CREATE TABLE post_tag (
post_id UUID NOT NULL REFERENCES post (id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tag (id) ON DELETE CASCADE,
PRIMARY KEY (post_id, tag_id)
);
CREATE TABLE oauth_token (
id SERIAL PRIMARY KEY,
owner_id UUID NOT NULL REFERENCES user_account (id) ON DELETE CASCADE,

View file

@ -171,17 +171,27 @@ pub fn create_note(
}
}).collect();
let mut recipients = vec![AP_PUBLIC.to_string()];
let mentions: Vec<Tag> = post.mentions.iter().map(|profile| {
let mut tags = vec![];
for profile in &post.mentions {
let actor_id = profile.actor_id(instance_url).unwrap();
if !profile.is_local() {
recipients.push(actor_id.clone());
};
Tag {
let tag = Tag {
name: profile.actor_address(instance_host),
tag_type: MENTION.to_string(),
href: Some(actor_id),
}
}).collect();
};
tags.push(tag);
};
for tag_name in &post.tags {
let tag = Tag {
name: format!("#{}", tag_name),
tag_type: HASHTAG.to_string(),
href: None,
};
tags.push(tag);
};
let in_reply_to_object_id = match post.in_reply_to_id {
Some(in_reply_to_id) => {
let post = in_reply_to.unwrap();
@ -208,7 +218,7 @@ pub fn create_note(
attributed_to: actor_id,
in_reply_to: in_reply_to_object_id,
content: post.content.clone(),
tag: mentions,
tag: tags,
to: recipients,
}
}

View file

@ -16,6 +16,7 @@ use crate::models::posts::queries::{
get_post_by_object_id,
delete_post,
};
use crate::models::posts::tags::normalize_tag;
use crate::models::profiles::queries::{
get_profile_by_actor_id,
get_profile_by_acct,
@ -227,9 +228,15 @@ pub async fn process_note(
}
}
let mut mentions: Vec<Uuid> = Vec::new();
let mut tags = vec![];
if let Some(list) = object.tag {
for tag in list {
if tag.tag_type == MENTION {
if tag.tag_type == HASHTAG {
// Ignore invalid tags
if let Ok(tag_name) = normalize_tag(&tag.name) {
tags.push(tag_name);
};
} else if tag.tag_type == MENTION {
if let Some(href) = tag.href {
let profile = get_or_fetch_profile_by_actor_id(
db_client,
@ -281,6 +288,7 @@ pub async fn process_note(
visibility,
attachments: attachments,
mentions: mentions,
tags: tags,
object_id: Some(object.id),
created_at: object.published,
};

View file

@ -24,4 +24,5 @@ pub const NOTE: &str = "Note";
pub const TOMBSTONE: &str = "Tombstone";
// Misc
pub const HASHTAG: &str = "Hashtag";
pub const PROPERTY_VALUE: &str = "PropertyValue";

View file

@ -27,6 +27,23 @@ impl Mention {
}
}
/// https://docs.joinmastodon.org/entities/tag/
#[derive(Serialize)]
pub struct Tag {
name: String,
url: String,
}
impl Tag {
fn from_tag_name(tag_name: String) -> Self {
Tag {
name: tag_name,
// TODO: add link to tag page
url: "".to_string(),
}
}
}
/// https://docs.joinmastodon.org/entities/status/
#[derive(Serialize)]
pub struct Status {
@ -43,6 +60,7 @@ pub struct Status {
pub reblogs_count: i32,
pub media_attachments: Vec<Attachment>,
mentions: Vec<Mention>,
tags: Vec<Tag>,
// Authorized user attributes
pub favourited: bool,
@ -63,6 +81,9 @@ impl Status {
let mentions: Vec<Mention> = post.mentions.into_iter()
.map(|item| Mention::from_profile(item, instance_url))
.collect();
let tags: Vec<Tag> = post.tags.into_iter()
.map(|tag_name| Tag::from_tag_name(tag_name))
.collect();
let account = Account::from_profile(post.author, instance_url);
let reblog = if let Some(repost_of) = post.repost_of {
let status = Status::from_post(*repost_of, instance_url);
@ -88,6 +109,7 @@ impl Status {
reblogs_count: post.repost_count,
media_attachments: attachments,
mentions: mentions,
tags: tags,
favourited: post.actions.as_ref().map_or(false, |actions| actions.favourited),
reblogged: post.actions.as_ref().map_or(false, |actions| actions.reposted),
ipfs_cid: post.ipfs_cid,
@ -118,6 +140,7 @@ impl From<StatusData> for PostCreateData {
visibility: Visibility::Public,
attachments: value.media_ids.unwrap_or(vec![]),
mentions: vec![],
tags: vec![],
object_id: None,
created_at: None,
}

View file

@ -23,6 +23,7 @@ use crate::mastodon_api::oauth::auth::get_current_user;
use crate::models::attachments::queries::set_attachment_ipfs_cid;
use crate::models::posts::helpers::can_view_post;
use crate::models::posts::mentions::{find_mentioned_profiles, replace_mentions};
use crate::models::posts::tags::{find_tags, replace_tags};
use crate::models::profiles::queries::get_followers;
use crate::models::posts::helpers::{
get_actions_for_posts,
@ -70,6 +71,8 @@ async fn create_status(
);
post_data.mentions = mention_map.values()
.map(|profile| profile.id).collect();
post_data.tags = find_tags(&post_data.content);
post_data.content = replace_tags(&post_data.content, &post_data.tags);
let post = create_post(db_client, &current_user.id, post_data).await?;
// Federate
let maybe_in_reply_to = match post.in_reply_to_id {

View file

@ -5,7 +5,11 @@ use uuid::Uuid;
use crate::errors::DatabaseError;
use crate::models::posts::helpers::get_actions_for_posts;
use crate::models::posts::queries::{RELATED_ATTACHMENTS, RELATED_MENTIONS};
use crate::models::posts::queries::{
RELATED_ATTACHMENTS,
RELATED_MENTIONS,
RELATED_TAGS,
};
use super::types::{EventType, Notification};
async fn create_notification(
@ -98,7 +102,8 @@ pub async fn get_notifications(
SELECT
notification, sender, post, post_author,
{related_attachments},
{related_mentions}
{related_mentions},
{related_tags}
FROM notification
JOIN actor_profile AS sender
ON notification.sender_id = sender.id
@ -111,6 +116,7 @@ pub async fn get_notifications(
",
related_attachments=RELATED_ATTACHMENTS,
related_mentions=RELATED_MENTIONS,
related_tags=RELATED_TAGS,
);
let rows = db_client.query(
statement.as_str(),

View file

@ -87,7 +87,14 @@ impl TryFrom<&Row> for Notification {
let db_post_author: DbActorProfile = row.try_get("post_author")?;
let db_attachments: Vec<DbMediaAttachment> = row.try_get("attachments")?;
let db_mentions: Vec<DbActorProfile> = row.try_get("mentions")?;
let post = Post::new(db_post, db_post_author, db_attachments, db_mentions)?;
let db_tags: Vec<String> = row.try_get("tags")?;
let post = Post::new(
db_post,
db_post_author,
db_attachments,
db_mentions,
db_tags,
)?;
Some(post)
},
None => None,

View file

@ -1,4 +1,5 @@
pub mod helpers;
pub mod mentions;
pub mod queries;
pub mod tags;
pub mod types;

View file

@ -105,6 +105,29 @@ pub async fn create_post(
let db_mentions: Vec<DbActorProfile> = mentions_rows.iter()
.map(|row| row.try_get("actor_profile"))
.collect::<Result<_, _>>()?;
// Create tags
transaction.execute(
"
INSERT INTO tag (tag_name)
SELECT unnest($1::text[])
ON CONFLICT (tag_name) DO NOTHING
",
&[&data.tags],
).await?;
let tags_rows = transaction.query(
"
INSERT INTO post_tag (post_id, tag_id)
SELECT $1, tag.id FROM tag WHERE tag_name = ANY($2)
RETURNING (SELECT tag_name FROM tag WHERE tag.id = tag_id)
",
&[&db_post.id, &data.tags],
).await?;
if tags_rows.len() != data.tags.len() {
return Err(DatabaseError::NotFound("tag"));
};
let db_tags: Vec<String> = tags_rows.iter()
.map(|row| row.try_get("tag_name"))
.collect::<Result<_, _>>()?;
// Update counters
let author = update_post_count(&transaction, &db_post.author_id, 1).await?;
let mut notified_users = vec![];
@ -157,7 +180,7 @@ pub async fn create_post(
};
transaction.commit().await?;
let post = Post::new(db_post, author, db_attachments, db_mentions)?;
let post = Post::new(db_post, author, db_attachments, db_mentions, db_tags)?;
Ok(post)
}
@ -175,6 +198,13 @@ pub const RELATED_MENTIONS: &str =
WHERE post_id = post.id
) AS mentions";
pub const RELATED_TAGS: &str =
"ARRAY(
SELECT tag.tag_name FROM tag
JOIN post_tag ON post_tag.tag_id = tag.id
WHERE post_tag.post_id = post.id
) AS tags";
pub async fn get_home_timeline(
db_client: &impl GenericClient,
current_user_id: &Uuid,
@ -188,7 +218,8 @@ pub async fn get_home_timeline(
SELECT
post, actor_profile,
{related_attachments},
{related_mentions}
{related_mentions},
{related_tags}
FROM post
JOIN actor_profile ON post.author_id = actor_profile.id
WHERE
@ -212,6 +243,7 @@ pub async fn get_home_timeline(
",
related_attachments=RELATED_ATTACHMENTS,
related_mentions=RELATED_MENTIONS,
related_tags=RELATED_TAGS,
visibility_public=i16::from(&Visibility::Public),
);
let rows = db_client.query(
@ -233,13 +265,15 @@ pub async fn get_posts(
SELECT
post, actor_profile,
{related_attachments},
{related_mentions}
{related_mentions},
{related_tags}
FROM post
JOIN actor_profile ON post.author_id = actor_profile.id
WHERE post.id = ANY($1)
",
related_attachments=RELATED_ATTACHMENTS,
related_mentions=RELATED_MENTIONS,
related_tags=RELATED_TAGS,
);
let rows = db_client.query(
statement.as_str(),
@ -273,7 +307,8 @@ pub async fn get_posts_by_author(
SELECT
post, actor_profile,
{related_attachments},
{related_mentions}
{related_mentions},
{related_tags}
FROM post
JOIN actor_profile ON post.author_id = actor_profile.id
WHERE {condition}
@ -281,6 +316,7 @@ pub async fn get_posts_by_author(
",
related_attachments=RELATED_ATTACHMENTS,
related_mentions=RELATED_MENTIONS,
related_tags=RELATED_TAGS,
condition=condition,
);
let rows = db_client.query(
@ -302,13 +338,15 @@ pub async fn get_post_by_id(
SELECT
post, actor_profile,
{related_attachments},
{related_mentions}
{related_mentions},
{related_tags}
FROM post
JOIN actor_profile ON post.author_id = actor_profile.id
WHERE post.id = $1
",
related_attachments=RELATED_ATTACHMENTS,
related_mentions=RELATED_MENTIONS,
related_tags=RELATED_TAGS,
);
let maybe_row = db_client.query_opt(
statement.as_str(),
@ -368,7 +406,8 @@ pub async fn get_thread(
SELECT
post, actor_profile,
{related_attachments},
{related_mentions}
{related_mentions},
{related_tags}
FROM post
JOIN context ON post.id = context.id
JOIN actor_profile ON post.author_id = actor_profile.id
@ -377,6 +416,7 @@ pub async fn get_thread(
",
related_attachments=RELATED_ATTACHMENTS,
related_mentions=RELATED_MENTIONS,
related_tags=RELATED_TAGS,
condition=condition,
);
let rows = db_client.query(
@ -401,13 +441,15 @@ pub async fn get_post_by_object_id(
SELECT
post, actor_profile,
{related_attachments},
{related_mentions}
{related_mentions},
{related_tags}
FROM post
JOIN actor_profile ON post.author_id = actor_profile.id
WHERE post.object_id = $1
",
related_attachments=RELATED_ATTACHMENTS,
related_mentions=RELATED_MENTIONS,
related_tags=RELATED_TAGS,
);
let maybe_row = db_client.query_opt(
statement.as_str(),
@ -427,13 +469,15 @@ pub async fn get_post_by_ipfs_cid(
SELECT
post, actor_profile,
{related_attachments},
{related_mentions}
{related_mentions},
{related_tags}
FROM post
JOIN actor_profile ON post.author_id = actor_profile.id
WHERE post.ipfs_cid = $1
",
related_attachments=RELATED_ATTACHMENTS,
related_mentions=RELATED_MENTIONS,
related_tags=RELATED_TAGS,
);
let result = db_client.query_opt(
statement.as_str(),

108
src/models/posts/tags.rs Normal file
View file

@ -0,0 +1,108 @@
use regex::{Captures, Regex};
use crate::errors::ConversionError;
const HASHTAG_RE: &str = r"(?m)(?P<before>^|\s)#(?P<tag>\S+)";
const HASHTAG_SECONDARY_RE: &str = r"^(?P<tag>[0-9A-Za-z]+)(?P<after>(\.|<br>|\.<br>)?)$";
const HASHTAG_NAME_RE: &str = r"^\w+$";
/// Finds anything that looks like a hashtag
pub fn find_tags(text: &str) -> Vec<String> {
let hashtag_re = Regex::new(HASHTAG_RE).unwrap();
let hashtag_secondary_re = Regex::new(HASHTAG_SECONDARY_RE).unwrap();
let mut tags = vec![];
for caps in hashtag_re.captures_iter(text) {
if let Some(secondary_caps) = hashtag_secondary_re.captures(&caps["tag"]) {
let tag_name = secondary_caps["tag"].to_string().to_lowercase();
if !tags.contains(&tag_name) {
tags.push(tag_name);
};
};
};
tags
}
/// Replaces hashtags with links
pub fn replace_tags(text: &str, tags: &[String]) -> String {
let hashtag_re = Regex::new(HASHTAG_RE).unwrap();
let hashtag_secondary_re = Regex::new(HASHTAG_SECONDARY_RE).unwrap();
let result = hashtag_re.replace_all(text, |caps: &Captures| {
if let Some(secondary_caps) = hashtag_secondary_re.captures(&caps["tag"]) {
let before = caps["before"].to_string();
let tag = secondary_caps["tag"].to_string();
let tag_name = tag.to_lowercase();
let after = secondary_caps["after"].to_string();
if tags.contains(&tag_name) {
format!(
r#"{}<a class="hashtag" href="/tag/{}">#{}</a>{}"#,
before,
tag_name,
tag,
after,
)
} else {
caps[0].to_string()
}
} else {
caps[0].to_string()
}
});
result.to_string()
}
pub fn normalize_tag(tag: &str) -> Result<String, ConversionError> {
let hashtag_name_re = Regex::new(HASHTAG_NAME_RE).unwrap();
let tag_name = tag.trim_start_matches('#');
if !hashtag_name_re.is_match(tag_name) {
return Err(ConversionError);
};
Ok(tag_name.to_lowercase())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_tags() {
let text = concat!(
"@user1@server1 some text #TestTag.\n",
"#TAG1 #tag1 #test_underscore #test*special ",
"more text #tag2",
);
let tags = find_tags(text);
assert_eq!(tags, vec![
"testtag",
"tag1",
"tag2",
]);
}
#[test]
fn test_replace_tags() {
let text = concat!(
"@user1@server1 some text #TestTag.\n",
"#TAG1 #tag1 #test_underscore #test*special ",
"more text #tag2",
);
let tags = find_tags(text);
let output = replace_tags(&text, &tags);
let expected_output = concat!(
r#"@user1@server1 some text <a class="hashtag" href="/tag/testtag">#TestTag</a>."#, "\n",
r#"<a class="hashtag" href="/tag/tag1">#TAG1</a> <a class="hashtag" href="/tag/tag1">#tag1</a> "#,
r#"#test_underscore #test*special "#,
r#"more text <a class="hashtag" href="/tag/tag2">#tag2</a>"#,
);
assert_eq!(output, expected_output);
}
#[test]
fn test_normalize_tag() {
let tag = "#ActivityPub";
let output = normalize_tag(tag).unwrap();
assert_eq!(output, "activitypub");
}
}

View file

@ -86,6 +86,7 @@ pub struct Post {
pub repost_count: i32,
pub attachments: Vec<DbMediaAttachment>,
pub mentions: Vec<DbActorProfile>,
pub tags: Vec<String>,
pub object_id: Option<String>,
pub ipfs_cid: Option<String>,
pub token_id: Option<i32>,
@ -102,6 +103,7 @@ impl Post {
db_author: DbActorProfile,
db_attachments: Vec<DbMediaAttachment>,
db_mentions: Vec<DbActorProfile>,
db_tags: Vec<String>,
) -> Result<Self, ConversionError> {
// Consistency checks
if db_post.author_id != db_author.id {
@ -122,6 +124,7 @@ impl Post {
repost_count: db_post.repost_count,
attachments: db_attachments,
mentions: db_mentions,
tags: db_tags,
object_id: db_post.object_id,
ipfs_cid: db_post.ipfs_cid,
token_id: db_post.token_id,
@ -160,6 +163,7 @@ impl Default for Post {
repost_count: 0,
attachments: vec![],
mentions: vec![],
tags: vec![],
object_id: None,
ipfs_cid: None,
token_id: None,
@ -180,7 +184,8 @@ impl TryFrom<&Row> for Post {
let db_profile: DbActorProfile = row.try_get("actor_profile")?;
let db_attachments: Vec<DbMediaAttachment> = row.try_get("attachments")?;
let db_mentions: Vec<DbActorProfile> = row.try_get("mentions")?;
let post = Self::new(db_post, db_profile, db_attachments, db_mentions)?;
let db_tags: Vec<String> = row.try_get("tags")?;
let post = Self::new(db_post, db_profile, db_attachments, db_mentions, db_tags)?;
Ok(post)
}
}
@ -193,6 +198,7 @@ pub struct PostCreateData {
pub visibility: Visibility,
pub attachments: Vec<Uuid>,
pub mentions: Vec<Uuid>,
pub tags: Vec<String>,
pub object_id: Option<String>,
pub created_at: Option<DateTime<Utc>>,
}
@ -223,6 +229,7 @@ mod tests {
visibility: Visibility::Public,
attachments: vec![],
mentions: vec![],
tags: vec![],
object_id: None,
created_at: None,
};
@ -238,6 +245,7 @@ mod tests {
visibility: Visibility::Public,
attachments: vec![],
mentions: vec![],
tags: vec![],
object_id: None,
created_at: None,
};