Deserialize object tag to Vec<Value>

This commit is contained in:
silverpill 2023-02-28 00:20:59 +00:00
parent 849b201ab9
commit 721238d897
3 changed files with 239 additions and 221 deletions

View file

@ -254,7 +254,7 @@ pub async fn import_post(
// Fetch parent object on next iteration // Fetch parent object on next iteration
queue.push(object_id.to_owned()); queue.push(object_id.to_owned());
}; };
for object_id in get_object_links(&object)? { for object_id in get_object_links(&object) {
// Fetch linked objects after fetching current thread // Fetch linked objects after fetching current thread
queue.insert(0, object_id); queue.insert(0, object_id);
}; };

View file

@ -199,30 +199,26 @@ pub async fn get_object_attachments(
pub fn get_object_links( pub fn get_object_links(
object: &Object, object: &Object,
) -> Result<Vec<String>, HandlerError> { ) -> Vec<String> {
let mut links = vec![]; let mut links = vec![];
if let Some(ref value) = object.tag { for tag_value in object.tag.clone() {
let list: Vec<JsonValue> = parse_property_value(value) let tag_type = tag_value["type"].as_str().unwrap_or(HASHTAG);
.map_err(|_| ValidationError("invalid tag property"))?; if tag_type == LINK {
for tag_value in list { let tag: LinkTag = match serde_json::from_value(tag_value) {
let tag_type = tag_value["type"].as_str().unwrap_or(HASHTAG); Ok(tag) => tag,
if tag_type == LINK { Err(_) => {
let tag: LinkTag = match serde_json::from_value(tag_value) { log::warn!("invalid link tag");
Ok(tag) => tag,
Err(_) => {
log::warn!("invalid link tag");
continue;
},
};
if tag.media_type != AP_MEDIA_TYPE &&
tag.media_type != AS_MEDIA_TYPE
{
// Unknown media type
continue; continue;
}; },
if !links.contains(&tag.href) { };
links.push(tag.href); if tag.media_type != AP_MEDIA_TYPE &&
}; tag.media_type != AS_MEDIA_TYPE
{
// Unknown media type
continue;
};
if !links.contains(&tag.href) {
links.push(tag.href);
}; };
}; };
}; };
@ -231,7 +227,7 @@ pub fn get_object_links(
links.push(object_id.to_owned()); links.push(object_id.to_owned());
}; };
}; };
Ok(links) links
} }
pub async fn get_object_tags( pub async fn get_object_tags(
@ -246,220 +242,216 @@ pub async fn get_object_tags(
let mut hashtags = vec![]; let mut hashtags = vec![];
let mut links = vec![]; let mut links = vec![];
let mut emojis = vec![]; let mut emojis = vec![];
if let Some(ref value) = object.tag { for tag_value in object.tag.clone() {
let list: Vec<JsonValue> = parse_property_value(value) let tag_type = tag_value["type"].as_str().unwrap_or(HASHTAG);
.map_err(|_| ValidationError("invalid tag property"))?; if tag_type == HASHTAG {
for tag_value in list { let tag: Tag = match serde_json::from_value(tag_value) {
let tag_type = tag_value["type"].as_str().unwrap_or(HASHTAG); Ok(tag) => tag,
if tag_type == HASHTAG { Err(_) => {
let tag: Tag = match serde_json::from_value(tag_value) { log::warn!("invalid hashtag");
Ok(tag) => tag, continue;
Err(_) => { },
log::warn!("invalid hashtag"); };
continue; if let Some(tag_name) = tag.name {
}, // Ignore invalid tags
}; if let Ok(tag_name) = normalize_hashtag(&tag_name) {
if let Some(tag_name) = tag.name { if !hashtags.contains(&tag_name) {
// Ignore invalid tags hashtags.push(tag_name);
if let Ok(tag_name) = normalize_hashtag(&tag_name) {
if !hashtags.contains(&tag_name) {
hashtags.push(tag_name);
};
}; };
}; };
} else if tag_type == MENTION { };
let tag: Tag = match serde_json::from_value(tag_value) { } else if tag_type == MENTION {
Ok(tag) => tag, let tag: Tag = match serde_json::from_value(tag_value) {
Err(_) => { Ok(tag) => tag,
log::warn!("invalid mention"); Err(_) => {
continue; log::warn!("invalid mention");
}, continue;
}; },
// Try to find profile by actor ID. };
if let Some(href) = tag.href { // Try to find profile by actor ID.
if let Ok(username) = parse_local_actor_id(&instance.url(), &href) { if let Some(href) = tag.href {
let user = get_user_by_name(db_client, &username).await?; if let Ok(username) = parse_local_actor_id(&instance.url(), &href) {
if !mentions.contains(&user.id) { let user = get_user_by_name(db_client, &username).await?;
mentions.push(user.id); if !mentions.contains(&user.id) {
}; mentions.push(user.id);
continue;
}; };
// NOTE: `href` attribute is usually actor ID
// but also can be actor URL (profile link).
match get_or_import_profile_by_actor_id(
db_client,
&instance,
&media_dir,
&href,
).await {
Ok(profile) => {
if !mentions.contains(&profile.id) {
mentions.push(profile.id);
};
continue;
},
Err(error) => {
log::warn!(
"failed to find mentioned profile by ID {}: {}",
href,
error,
);
},
};
};
// Try to find profile by actor address
let tag_name = match tag.name {
Some(name) => name,
None => {
log::warn!("failed to parse mention");
continue;
},
};
if let Ok(actor_address) = mention_to_address(&tag_name) {
let profile = match get_or_import_profile_by_actor_address(
db_client,
&instance,
&media_dir,
&actor_address,
).await {
Ok(profile) => profile,
Err(error @ (
HandlerError::FetchError(_) |
HandlerError::DatabaseError(DatabaseError::NotFound(_))
)) => {
// Ignore mention if fetcher fails
// Ignore mention if local address is not valid
log::warn!(
"failed to find mentioned profile {}: {}",
actor_address,
error,
);
continue;
},
Err(other_error) => return Err(other_error),
};
if !mentions.contains(&profile.id) {
mentions.push(profile.id);
};
} else {
log::warn!("failed to parse mention {}", tag_name);
};
} else if tag_type == LINK {
let tag: LinkTag = match serde_json::from_value(tag_value) {
Ok(tag) => tag,
Err(_) => {
log::warn!("invalid link tag");
continue;
},
};
if tag.media_type != AP_MEDIA_TYPE &&
tag.media_type != AS_MEDIA_TYPE
{
// Unknown media type
continue; continue;
}; };
let href = redirects.get(&tag.href).unwrap_or(&tag.href); // NOTE: `href` attribute is usually actor ID
let linked = get_post_by_object_id( // but also can be actor URL (profile link).
match get_or_import_profile_by_actor_id(
db_client, db_client,
&instance.url(),
href,
).await?;
if !links.contains(&linked.id) {
links.push(linked.id);
};
} else if tag_type == EMOJI {
let tag: EmojiTag = match serde_json::from_value(tag_value) {
Ok(tag) => tag,
Err(error) => {
log::warn!("invalid emoji tag: {}", error);
continue;
},
};
if emojis.len() >= EMOJIS_MAX_NUM {
log::warn!("too many emojis");
continue;
};
let tag_name = tag.name.trim_matches(':');
if validate_emoji_name(tag_name).is_err() {
log::warn!("invalid emoji name");
continue;
};
let maybe_emoji_id = match get_emoji_by_remote_object_id(
db_client,
&tag.id,
).await {
Ok(emoji) => {
if emoji.updated_at >= tag.updated {
// Emoji already exists and is up to date
if !emojis.contains(&emoji.id) {
emojis.push(emoji.id);
};
continue;
};
if emoji.emoji_name != tag_name {
log::warn!("emoji name can't be changed");
continue;
};
Some(emoji.id)
},
Err(DatabaseError::NotFound("emoji")) => None,
Err(other_error) => return Err(other_error.into()),
};
let (file_name, file_size, maybe_media_type) = match fetch_file(
&instance, &instance,
&tag.icon.url,
tag.icon.media_type.as_deref(),
EMOJI_MAX_SIZE,
&media_dir, &media_dir,
&href,
).await { ).await {
Ok(file) => file, Ok(profile) => {
Err(error) => { if !mentions.contains(&profile.id) {
log::warn!("failed to fetch emoji: {}", error); mentions.push(profile.id);
};
continue; continue;
}, },
}; Err(error) => {
let media_type = match maybe_media_type {
Some(media_type) if EMOJI_MEDIA_TYPES.contains(&media_type.as_str()) => {
media_type
},
_ => {
log::warn!( log::warn!(
"unexpected emoji media type: {:?}", "failed to find mentioned profile by ID {}: {}",
maybe_media_type, href,
error,
);
},
};
};
// Try to find profile by actor address
let tag_name = match tag.name {
Some(name) => name,
None => {
log::warn!("failed to parse mention");
continue;
},
};
if let Ok(actor_address) = mention_to_address(&tag_name) {
let profile = match get_or_import_profile_by_actor_address(
db_client,
&instance,
&media_dir,
&actor_address,
).await {
Ok(profile) => profile,
Err(error @ (
HandlerError::FetchError(_) |
HandlerError::DatabaseError(DatabaseError::NotFound(_))
)) => {
// Ignore mention if fetcher fails
// Ignore mention if local address is not valid
log::warn!(
"failed to find mentioned profile {}: {}",
actor_address,
error,
); );
continue; continue;
}, },
Err(other_error) => return Err(other_error),
}; };
log::info!("downloaded emoji {}", tag.icon.url); if !mentions.contains(&profile.id) {
let image = EmojiImage { file_name, file_size, media_type }; mentions.push(profile.id);
let emoji = if let Some(emoji_id) = maybe_emoji_id {
update_emoji(
db_client,
&emoji_id,
image,
&tag.updated,
).await?
} else {
let hostname = get_hostname(&tag.id)
.map_err(|_| ValidationError("invalid emoji ID"))?;
create_emoji(
db_client,
tag_name,
Some(&hostname),
image,
Some(&tag.id),
&tag.updated,
).await?
};
if !emojis.contains(&emoji.id) {
emojis.push(emoji.id);
}; };
} else { } else {
log::warn!( log::warn!("failed to parse mention {}", tag_name);
"skipping tag of type {}",
tag_type,
);
}; };
} else if tag_type == LINK {
let tag: LinkTag = match serde_json::from_value(tag_value) {
Ok(tag) => tag,
Err(_) => {
log::warn!("invalid link tag");
continue;
},
};
if tag.media_type != AP_MEDIA_TYPE &&
tag.media_type != AS_MEDIA_TYPE
{
// Unknown media type
continue;
};
let href = redirects.get(&tag.href).unwrap_or(&tag.href);
let linked = get_post_by_object_id(
db_client,
&instance.url(),
href,
).await?;
if !links.contains(&linked.id) {
links.push(linked.id);
};
} else if tag_type == EMOJI {
let tag: EmojiTag = match serde_json::from_value(tag_value) {
Ok(tag) => tag,
Err(error) => {
log::warn!("invalid emoji tag: {}", error);
continue;
},
};
if emojis.len() >= EMOJIS_MAX_NUM {
log::warn!("too many emojis");
continue;
};
let tag_name = tag.name.trim_matches(':');
if validate_emoji_name(tag_name).is_err() {
log::warn!("invalid emoji name");
continue;
};
let maybe_emoji_id = match get_emoji_by_remote_object_id(
db_client,
&tag.id,
).await {
Ok(emoji) => {
if emoji.updated_at >= tag.updated {
// Emoji already exists and is up to date
if !emojis.contains(&emoji.id) {
emojis.push(emoji.id);
};
continue;
};
if emoji.emoji_name != tag_name {
log::warn!("emoji name can't be changed");
continue;
};
Some(emoji.id)
},
Err(DatabaseError::NotFound("emoji")) => None,
Err(other_error) => return Err(other_error.into()),
};
let (file_name, file_size, maybe_media_type) = match fetch_file(
&instance,
&tag.icon.url,
tag.icon.media_type.as_deref(),
EMOJI_MAX_SIZE,
&media_dir,
).await {
Ok(file) => file,
Err(error) => {
log::warn!("failed to fetch emoji: {}", error);
continue;
},
};
let media_type = match maybe_media_type {
Some(media_type) if EMOJI_MEDIA_TYPES.contains(&media_type.as_str()) => {
media_type
},
_ => {
log::warn!(
"unexpected emoji media type: {:?}",
maybe_media_type,
);
continue;
},
};
log::info!("downloaded emoji {}", tag.icon.url);
let image = EmojiImage { file_name, file_size, media_type };
let emoji = if let Some(emoji_id) = maybe_emoji_id {
update_emoji(
db_client,
&emoji_id,
image,
&tag.updated,
).await?
} else {
let hostname = get_hostname(&tag.id)
.map_err(|_| ValidationError("invalid emoji ID"))?;
create_emoji(
db_client,
tag_name,
Some(&hostname),
image,
Some(&tag.id),
&tag.updated,
).await?
};
if !emojis.contains(&emoji.id) {
emojis.push(emoji.id);
};
} else {
log::warn!(
"skipping tag of type {}",
tag_type,
);
}; };
}; };
if let Some(ref object_id) = object.quote_url { if let Some(ref object_id) = object.quote_url {

View file

@ -1,7 +1,12 @@
use std::collections::HashMap; use std::collections::HashMap;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize}; use serde::{
Deserialize,
Deserializer,
Serialize,
de::{Error as DeserializerError},
};
use serde_json::Value; use serde_json::Value;
use super::constants::{ use super::constants::{
@ -10,6 +15,7 @@ use super::constants::{
W3ID_DATA_INTEGRITY_CONTEXT, W3ID_DATA_INTEGRITY_CONTEXT,
W3ID_SECURITY_CONTEXT, W3ID_SECURITY_CONTEXT,
}; };
use super::receiver::parse_property_value;
use super::vocabulary::HASHTAG; use super::vocabulary::HASHTAG;
#[derive(Deserialize, Serialize)] #[derive(Deserialize, Serialize)]
@ -82,6 +88,20 @@ pub struct EmojiTag {
pub updated: DateTime<Utc>, pub updated: DateTime<Utc>,
} }
fn deserialize_value_array<'de, D>(
deserializer: D,
) -> Result<Vec<Value>, D::Error>
where D: Deserializer<'de>
{
let maybe_value: Option<Value> = Option::deserialize(deserializer)?;
let values = if let Some(value) = maybe_value {
parse_property_value(&value).map_err(DeserializerError::custom)?
} else {
vec![]
};
Ok(values)
}
#[derive(Deserialize)] #[derive(Deserialize)]
#[cfg_attr(test, derive(Default))] #[cfg_attr(test, derive(Default))]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
@ -102,7 +122,13 @@ pub struct Object {
pub in_reply_to: Option<String>, pub in_reply_to: Option<String>,
pub content: Option<String>, pub content: Option<String>,
pub quote_url: Option<String>, pub quote_url: Option<String>,
pub tag: Option<Value>,
#[serde(
default,
deserialize_with = "deserialize_value_array",
)]
pub tag: Vec<Value>,
pub to: Option<Value>, pub to: Option<Value>,
pub updated: Option<DateTime<Utc>>, pub updated: Option<DateTime<Utc>>,
pub url: Option<Value>, pub url: Option<Value>,