2022-05-02 00:24:44 +00:00
|
|
|
use std::collections::HashMap;
|
|
|
|
use std::path::Path;
|
|
|
|
|
2022-07-28 14:09:57 +00:00
|
|
|
use serde_json::{Value as JsonValue};
|
2022-05-02 00:24:44 +00:00
|
|
|
use tokio_postgres::GenericClient;
|
|
|
|
use uuid::Uuid;
|
|
|
|
|
2022-07-16 01:49:27 +00:00
|
|
|
use crate::activitypub::{
|
2022-07-28 14:09:57 +00:00
|
|
|
activity::{Attachment, Link, Object},
|
2022-07-16 01:49:27 +00:00
|
|
|
constants::AP_PUBLIC,
|
|
|
|
fetcher::fetchers::fetch_file,
|
|
|
|
fetcher::helpers::{
|
|
|
|
get_or_import_profile_by_actor_id,
|
|
|
|
import_profile_by_actor_address,
|
|
|
|
ImportError,
|
|
|
|
},
|
|
|
|
identifiers::{parse_local_actor_id, parse_local_object_id},
|
|
|
|
receiver::{parse_array, parse_property_value},
|
|
|
|
vocabulary::{DOCUMENT, HASHTAG, IMAGE, MENTION, NOTE},
|
2022-05-02 00:24:44 +00:00
|
|
|
};
|
|
|
|
use crate::config::Instance;
|
2022-07-28 14:09:57 +00:00
|
|
|
use crate::errors::{ConversionError, DatabaseError, ValidationError};
|
2022-05-02 00:24:44 +00:00
|
|
|
use crate::models::attachments::queries::create_attachment;
|
|
|
|
use crate::models::posts::mentions::mention_to_address;
|
|
|
|
use crate::models::posts::queries::{
|
|
|
|
create_post,
|
|
|
|
get_post_by_id,
|
|
|
|
get_post_by_object_id,
|
|
|
|
};
|
|
|
|
use crate::models::posts::tags::normalize_tag;
|
|
|
|
use crate::models::posts::types::{Post, PostCreateData, Visibility};
|
|
|
|
use crate::models::profiles::queries::get_profile_by_acct;
|
2022-02-13 18:55:37 +00:00
|
|
|
use crate::models::profiles::types::DbActorProfile;
|
2022-05-02 00:24:44 +00:00
|
|
|
use crate::models::users::queries::get_user_by_name;
|
|
|
|
use crate::utils::html::clean_html;
|
|
|
|
|
|
|
|
fn get_note_author_id(object: &Object) -> Result<String, ValidationError> {
|
|
|
|
let attributed_to = object.attributed_to.as_ref()
|
|
|
|
.ok_or(ValidationError("unattributed note"))?;
|
|
|
|
let author_id = parse_array(attributed_to)
|
|
|
|
.map_err(|_| ValidationError("invalid attributedTo property"))?
|
|
|
|
.get(0)
|
|
|
|
.ok_or(ValidationError("invalid attributedTo property"))?
|
|
|
|
.to_string();
|
|
|
|
Ok(author_id)
|
|
|
|
}
|
|
|
|
|
|
|
|
const CONTENT_MAX_SIZE: usize = 100000;
|
2022-02-13 18:55:37 +00:00
|
|
|
|
2022-07-28 14:09:57 +00:00
|
|
|
fn parse_object_url(value: &JsonValue) -> Result<String, ConversionError> {
|
|
|
|
let object_url = match value {
|
|
|
|
JsonValue::String(string) => string.to_owned(),
|
|
|
|
other_value => {
|
|
|
|
let links: Vec<Link> = parse_property_value(other_value)?;
|
|
|
|
if let Some(link) = links.get(0) {
|
|
|
|
link.href.clone()
|
|
|
|
} else {
|
|
|
|
return Err(ConversionError);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
};
|
|
|
|
Ok(object_url)
|
|
|
|
}
|
|
|
|
|
2022-05-11 12:50:36 +00:00
|
|
|
pub fn get_note_content(object: &Object) -> Result<String, ValidationError> {
|
2022-07-28 14:09:57 +00:00
|
|
|
let mut content = object.content.as_ref()
|
2022-05-12 15:38:40 +00:00
|
|
|
// Lemmy pages and PeerTube videos have "name" property
|
|
|
|
.or(object.name.as_ref())
|
2022-07-28 14:09:57 +00:00
|
|
|
.ok_or(ValidationError("no content"))?
|
|
|
|
.to_owned();
|
|
|
|
if object.object_type != NOTE {
|
|
|
|
if let Some(ref value) = object.url {
|
|
|
|
// Append link to object
|
|
|
|
let object_url = parse_object_url(value)
|
|
|
|
.map_err(|_| ValidationError("invalid object URL"))?;
|
|
|
|
content += &format!(
|
|
|
|
r#"<p><a href="{0}" target="_blank" rel="noopener">{0}</a></p>"#,
|
|
|
|
object_url,
|
|
|
|
);
|
|
|
|
};
|
|
|
|
};
|
2022-05-02 00:24:44 +00:00
|
|
|
if content.len() > CONTENT_MAX_SIZE {
|
|
|
|
return Err(ValidationError("content is too long"));
|
|
|
|
};
|
2022-07-28 14:09:57 +00:00
|
|
|
let content_safe = clean_html(&content);
|
2022-05-02 00:24:44 +00:00
|
|
|
Ok(content_safe)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_note_visibility(
|
2022-02-13 18:55:37 +00:00
|
|
|
author: &DbActorProfile,
|
|
|
|
primary_audience: Vec<String>,
|
|
|
|
secondary_audience: Vec<String>,
|
|
|
|
) -> Visibility {
|
|
|
|
if primary_audience.contains(&AP_PUBLIC.to_string()) ||
|
|
|
|
secondary_audience.contains(&AP_PUBLIC.to_string()) {
|
2022-06-14 23:41:01 +00:00
|
|
|
return Visibility::Public;
|
|
|
|
};
|
|
|
|
let maybe_followers = author.actor_json.as_ref()
|
|
|
|
.and_then(|actor| actor.followers.as_ref());
|
|
|
|
if let Some(followers) = maybe_followers {
|
|
|
|
if primary_audience.contains(followers) ||
|
|
|
|
secondary_audience.contains(followers) {
|
|
|
|
return Visibility::Followers;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
let maybe_subscribers = author.actor_json.as_ref()
|
|
|
|
.and_then(|actor| actor.subscribers.as_ref());
|
|
|
|
if let Some(subscribers) = maybe_subscribers {
|
|
|
|
if primary_audience.contains(subscribers) ||
|
|
|
|
secondary_audience.contains(subscribers) {
|
|
|
|
return Visibility::Subscribers;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
Visibility::Direct
|
2022-02-13 18:55:37 +00:00
|
|
|
}
|
|
|
|
|
2022-05-02 00:24:44 +00:00
|
|
|
pub async fn handle_note(
|
|
|
|
db_client: &mut impl GenericClient,
|
|
|
|
instance: &Instance,
|
|
|
|
media_dir: &Path,
|
|
|
|
object: Object,
|
|
|
|
redirects: &HashMap<String, String>,
|
|
|
|
) -> Result<Post, ImportError> {
|
|
|
|
if object.object_type != NOTE {
|
|
|
|
// Could be Page (in Lemmy) or some other type
|
|
|
|
log::warn!("processing object of type {}", object.object_type);
|
|
|
|
};
|
|
|
|
|
|
|
|
let author_id = get_note_author_id(&object)?;
|
|
|
|
let author = get_or_import_profile_by_actor_id(
|
|
|
|
db_client,
|
|
|
|
instance,
|
|
|
|
media_dir,
|
|
|
|
&author_id,
|
2022-07-09 21:24:37 +00:00
|
|
|
).await.map_err(|err| {
|
|
|
|
log::warn!("failed to import {} ({})", author_id, err);
|
|
|
|
err
|
|
|
|
})?;
|
2022-05-02 00:24:44 +00:00
|
|
|
let content = get_note_content(&object)?;
|
|
|
|
|
|
|
|
let mut attachments: Vec<Uuid> = Vec::new();
|
|
|
|
if let Some(value) = object.attachment {
|
|
|
|
let list: Vec<Attachment> = parse_property_value(&value)
|
|
|
|
.map_err(|_| ValidationError("invalid attachment property"))?;
|
|
|
|
let mut downloaded = vec![];
|
|
|
|
for attachment in list {
|
|
|
|
if attachment.attachment_type != DOCUMENT &&
|
|
|
|
attachment.attachment_type != IMAGE
|
|
|
|
{
|
|
|
|
log::warn!(
|
|
|
|
"skipping attachment of type {}",
|
|
|
|
attachment.attachment_type,
|
|
|
|
);
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
let attachment_url = attachment.url
|
|
|
|
.ok_or(ValidationError("attachment URL is missing"))?;
|
|
|
|
let (file_name, media_type) = fetch_file(&attachment_url, media_dir).await
|
|
|
|
.map_err(|_| ValidationError("failed to fetch attachment"))?;
|
|
|
|
log::info!("downloaded attachment {}", attachment_url);
|
|
|
|
downloaded.push((
|
|
|
|
file_name,
|
|
|
|
attachment.media_type.or(media_type),
|
|
|
|
));
|
|
|
|
};
|
|
|
|
for (file_name, media_type) in downloaded {
|
|
|
|
let db_attachment = create_attachment(
|
|
|
|
db_client,
|
|
|
|
&author.id,
|
|
|
|
file_name,
|
|
|
|
media_type,
|
|
|
|
).await?;
|
|
|
|
attachments.push(db_attachment.id);
|
|
|
|
};
|
|
|
|
};
|
|
|
|
let mut mentions: Vec<Uuid> = Vec::new();
|
|
|
|
let mut tags = vec![];
|
|
|
|
if let Some(list) = object.tag {
|
|
|
|
for tag in list {
|
|
|
|
if tag.tag_type == HASHTAG {
|
|
|
|
if let Some(tag_name) = tag.name {
|
|
|
|
// Ignore invalid tags
|
|
|
|
if let Ok(tag_name) = normalize_tag(&tag_name) {
|
2022-07-10 20:57:09 +00:00
|
|
|
if !tags.contains(&tag_name) {
|
|
|
|
tags.push(tag_name);
|
|
|
|
};
|
2022-05-02 00:24:44 +00:00
|
|
|
};
|
|
|
|
};
|
|
|
|
} else if tag.tag_type == MENTION {
|
|
|
|
// Try to find profile by actor ID.
|
|
|
|
if let Some(href) = tag.href {
|
2022-07-16 01:49:27 +00:00
|
|
|
if let Ok(username) = parse_local_actor_id(&instance.url(), &href) {
|
2022-05-02 00:24:44 +00:00
|
|
|
let user = get_user_by_name(db_client, &username).await?;
|
|
|
|
if !mentions.contains(&user.id) {
|
|
|
|
mentions.push(user.id);
|
|
|
|
};
|
|
|
|
continue;
|
|
|
|
};
|
2022-07-09 20:29:42 +00:00
|
|
|
// NOTE: `href` attribute is usually actor ID
|
2022-05-02 00:24:44 +00:00
|
|
|
// but also can be actor URL (profile link).
|
|
|
|
match get_or_import_profile_by_actor_id(
|
|
|
|
db_client,
|
|
|
|
instance,
|
|
|
|
media_dir,
|
|
|
|
&href,
|
|
|
|
).await {
|
|
|
|
Ok(profile) => {
|
|
|
|
if !mentions.contains(&profile.id) {
|
|
|
|
mentions.push(profile.id);
|
|
|
|
};
|
|
|
|
continue;
|
|
|
|
},
|
|
|
|
Err(error) => {
|
2022-07-10 12:41:01 +00:00
|
|
|
log::warn!(
|
|
|
|
"failed to find mentioned profile by ID {}: {}",
|
|
|
|
href,
|
|
|
|
error,
|
|
|
|
);
|
2022-05-02 00:24:44 +00:00
|
|
|
},
|
|
|
|
};
|
|
|
|
};
|
|
|
|
// Try to find profile by actor address
|
|
|
|
let tag_name = match tag.name {
|
|
|
|
Some(name) => name,
|
|
|
|
None => {
|
|
|
|
log::warn!("failed to parse mention");
|
|
|
|
continue;
|
|
|
|
},
|
|
|
|
};
|
|
|
|
if let Ok(actor_address) = mention_to_address(
|
|
|
|
&instance.host(),
|
|
|
|
&tag_name,
|
|
|
|
) {
|
|
|
|
let profile = match get_profile_by_acct(
|
|
|
|
db_client,
|
|
|
|
&actor_address.acct(),
|
|
|
|
).await {
|
|
|
|
Ok(profile) => profile,
|
|
|
|
Err(DatabaseError::NotFound(_)) => {
|
|
|
|
match import_profile_by_actor_address(
|
|
|
|
db_client,
|
|
|
|
instance,
|
|
|
|
media_dir,
|
|
|
|
&actor_address,
|
|
|
|
).await {
|
|
|
|
Ok(profile) => profile,
|
|
|
|
Err(ImportError::FetchError(error)) => {
|
|
|
|
// Ignore mention if fetcher fails
|
2022-07-10 12:41:01 +00:00
|
|
|
log::warn!(
|
|
|
|
"failed to find mentioned profile {}: {}",
|
|
|
|
actor_address.acct(),
|
|
|
|
error,
|
|
|
|
);
|
2022-05-02 00:24:44 +00:00
|
|
|
continue;
|
|
|
|
},
|
|
|
|
Err(other_error) => {
|
|
|
|
return Err(other_error);
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
Err(other_error) => return Err(other_error.into()),
|
|
|
|
};
|
|
|
|
if !mentions.contains(&profile.id) {
|
|
|
|
mentions.push(profile.id);
|
|
|
|
};
|
|
|
|
} else {
|
|
|
|
log::warn!("failed to parse mention {}", tag_name);
|
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
let in_reply_to_id = match object.in_reply_to {
|
|
|
|
Some(object_id) => {
|
2022-07-16 01:49:27 +00:00
|
|
|
match parse_local_object_id(&instance.url(), &object_id) {
|
2022-05-02 00:24:44 +00:00
|
|
|
Ok(post_id) => {
|
|
|
|
// Local post
|
|
|
|
let post = get_post_by_id(db_client, &post_id).await?;
|
|
|
|
Some(post.id)
|
|
|
|
},
|
|
|
|
Err(_) => {
|
|
|
|
let note_id = redirects.get(&object_id)
|
|
|
|
.unwrap_or(&object_id);
|
|
|
|
let post = get_post_by_object_id(db_client, note_id).await?;
|
|
|
|
Some(post.id)
|
|
|
|
},
|
|
|
|
}
|
|
|
|
},
|
|
|
|
None => None,
|
|
|
|
};
|
|
|
|
let primary_audience = match object.to {
|
|
|
|
Some(value) => {
|
|
|
|
parse_array(&value)
|
|
|
|
.map_err(|_| ValidationError("invalid 'to' property value"))?
|
|
|
|
},
|
|
|
|
None => vec![],
|
|
|
|
};
|
|
|
|
let secondary_audience = match object.cc {
|
|
|
|
Some(value) => {
|
|
|
|
parse_array(&value)
|
|
|
|
.map_err(|_| ValidationError("invalid 'cc' property value"))?
|
|
|
|
},
|
|
|
|
None => vec![],
|
|
|
|
};
|
|
|
|
let visibility = get_note_visibility(
|
|
|
|
&author,
|
|
|
|
primary_audience,
|
|
|
|
secondary_audience,
|
|
|
|
);
|
|
|
|
if visibility != Visibility::Public {
|
|
|
|
log::warn!(
|
|
|
|
"processing note with visibility {:?} attributed to {}",
|
|
|
|
visibility,
|
|
|
|
author.username,
|
|
|
|
);
|
|
|
|
};
|
|
|
|
let post_data = PostCreateData {
|
|
|
|
content: content,
|
|
|
|
in_reply_to_id,
|
|
|
|
repost_of_id: None,
|
|
|
|
visibility,
|
|
|
|
attachments: attachments,
|
|
|
|
mentions: mentions,
|
|
|
|
tags: tags,
|
|
|
|
object_id: Some(object.id),
|
|
|
|
created_at: object.published,
|
|
|
|
};
|
|
|
|
let post = create_post(db_client, &author.id, post_data).await?;
|
|
|
|
Ok(post)
|
|
|
|
}
|
|
|
|
|
2022-02-13 18:55:37 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2022-07-28 14:09:57 +00:00
|
|
|
use serde_json::json;
|
|
|
|
use crate::activitypub::{
|
|
|
|
activity::Object,
|
|
|
|
actors::types::Actor,
|
|
|
|
vocabulary::NOTE,
|
|
|
|
};
|
2022-02-13 18:55:37 +00:00
|
|
|
use super::*;
|
|
|
|
|
2022-05-02 00:24:44 +00:00
|
|
|
#[test]
|
|
|
|
fn test_get_note_content() {
|
|
|
|
let object = Object {
|
|
|
|
content: Some("test".to_string()),
|
|
|
|
object_type: NOTE.to_string(),
|
|
|
|
..Default::default()
|
|
|
|
};
|
|
|
|
let content = get_note_content(&object).unwrap();
|
|
|
|
assert_eq!(content, "test");
|
|
|
|
}
|
|
|
|
|
2022-07-28 14:09:57 +00:00
|
|
|
#[test]
|
|
|
|
fn test_get_note_content_from_video() {
|
|
|
|
let object = Object {
|
|
|
|
name: Some("test-name".to_string()),
|
|
|
|
content: Some("test-content".to_string()),
|
|
|
|
object_type: "Video".to_string(),
|
|
|
|
url: Some(json!([{
|
|
|
|
"type": "Link",
|
|
|
|
"mediaType": "text/html",
|
|
|
|
"href": "https://example.org/xyz",
|
|
|
|
}])),
|
|
|
|
..Default::default()
|
|
|
|
};
|
|
|
|
let content = get_note_content(&object).unwrap();
|
|
|
|
assert_eq!(
|
|
|
|
content,
|
|
|
|
r#"test-content<p><a href="https://example.org/xyz" target="_blank" rel="noopener">https://example.org/xyz</a></p>"#,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-02-13 18:55:37 +00:00
|
|
|
#[test]
|
|
|
|
fn test_get_note_visibility_public() {
|
|
|
|
let author = DbActorProfile::default();
|
|
|
|
let primary_audience = vec![AP_PUBLIC.to_string()];
|
|
|
|
let secondary_audience = vec![];
|
|
|
|
let visibility = get_note_visibility(
|
|
|
|
&author,
|
|
|
|
primary_audience,
|
|
|
|
secondary_audience,
|
|
|
|
);
|
|
|
|
assert_eq!(visibility, Visibility::Public);
|
|
|
|
}
|
|
|
|
|
2022-02-13 17:55:35 +00:00
|
|
|
#[test]
|
|
|
|
fn test_get_note_visibility_followers() {
|
|
|
|
let author_followers = "https://example.com/users/author/followers";
|
|
|
|
let author = DbActorProfile {
|
|
|
|
actor_json: Some(Actor {
|
|
|
|
followers: Some(author_followers.to_string()),
|
|
|
|
..Default::default()
|
|
|
|
}),
|
|
|
|
..Default::default()
|
|
|
|
};
|
|
|
|
let primary_audience = vec![author_followers.to_string()];
|
|
|
|
let secondary_audience = vec![];
|
|
|
|
let visibility = get_note_visibility(
|
|
|
|
&author,
|
|
|
|
primary_audience,
|
|
|
|
secondary_audience,
|
|
|
|
);
|
|
|
|
assert_eq!(visibility, Visibility::Followers);
|
|
|
|
}
|
|
|
|
|
2022-06-14 23:41:01 +00:00
|
|
|
#[test]
|
|
|
|
fn test_get_note_visibility_subscribers() {
|
|
|
|
let author_followers = "https://example.com/users/author/followers";
|
|
|
|
let author_subscribers = "https://example.com/users/author/subscribers";
|
|
|
|
let author = DbActorProfile {
|
|
|
|
actor_json: Some(Actor {
|
|
|
|
followers: Some(author_followers.to_string()),
|
|
|
|
subscribers: Some(author_subscribers.to_string()),
|
|
|
|
..Default::default()
|
|
|
|
}),
|
|
|
|
..Default::default()
|
|
|
|
};
|
|
|
|
let primary_audience = vec![author_subscribers.to_string()];
|
|
|
|
let secondary_audience = vec![];
|
|
|
|
let visibility = get_note_visibility(
|
|
|
|
&author,
|
|
|
|
primary_audience,
|
|
|
|
secondary_audience,
|
|
|
|
);
|
|
|
|
assert_eq!(visibility, Visibility::Subscribers);
|
|
|
|
}
|
|
|
|
|
2022-02-13 18:55:37 +00:00
|
|
|
#[test]
|
|
|
|
fn test_get_note_visibility_direct() {
|
|
|
|
let author = DbActorProfile::default();
|
|
|
|
let primary_audience = vec!["https://example.com/users/1".to_string()];
|
|
|
|
let secondary_audience = vec![];
|
|
|
|
let visibility = get_note_visibility(
|
|
|
|
&author,
|
|
|
|
primary_audience,
|
|
|
|
secondary_audience,
|
|
|
|
);
|
|
|
|
assert_eq!(visibility, Visibility::Direct);
|
|
|
|
}
|
|
|
|
}
|