diff --git a/Cargo.lock b/Cargo.lock index 0289d3544..87aa85496 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,9 +10,9 @@ checksum = "8f27d075294830fcab6f66e320dab524bc6d048f4a151698e153205559113772" [[package]] name = "activitypub_federation" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee819cada736b6e26c59706f9e6ff89a48060e635c0546ff984d84baefc8c13a" +checksum = "ce5c105760d36108026acde9cb779d8ef4714d5e551f248a9e8e0369b6671b78" dependencies = [ "activitystreams-kinds", "actix-web", diff --git a/Cargo.toml b/Cargo.toml index 9a8bac1fd..6c8d167cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,7 +89,7 @@ lemmy_api_common = { version = "=0.19.6-beta.7", path = "./crates/api_common" } lemmy_routes = { version = "=0.19.6-beta.7", path = "./crates/routes" } lemmy_db_views = { version = "=0.19.6-beta.7", path = "./crates/db_views" } lemmy_federate = { version = "=0.19.6-beta.7", path = "./crates/federate" } -activitypub_federation = { version = "0.6.1", default-features = false, features = [ +activitypub_federation = { version = "0.6.2", default-features = false, features = [ "actix-web", ] } diesel = "2.2.6" diff --git a/crates/api/src/post/get_link_metadata.rs b/crates/api/src/post/get_link_metadata.rs index a777cab17..e680b29c5 100644 --- a/crates/api/src/post/get_link_metadata.rs +++ b/crates/api/src/post/get_link_metadata.rs @@ -16,7 +16,7 @@ pub async fn get_link_metadata( _local_user_view: LocalUserView, ) -> LemmyResult> { let url = Url::parse(&data.url).with_lemmy_type(LemmyErrorType::InvalidUrl)?; - let metadata = fetch_link_metadata(&url, &context).await?; + let metadata = fetch_link_metadata(&url, &context, false).await?; Ok(Json(GetSiteMetadataResponse { metadata })) } diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index a70a685ef..ba9a1eb0a 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -15,20 +15,23 @@ use lemmy_db_schema::source::{ site::Site, }; use lemmy_utils::{ - error::{LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult}, + error::{FederationError, LemmyError, LemmyErrorExt, LemmyErrorType, LemmyResult}, settings::structs::{PictrsImageMode, Settings}, REQWEST_TIMEOUT, VERSION, }; use mime::{Mime, TEXT_HTML}; use reqwest::{ - header::{CONTENT_TYPE, RANGE}, + header::{CONTENT_TYPE, LOCATION, RANGE}, + redirect::Policy, Client, ClientBuilder, Response, }; use reqwest_middleware::ClientWithMiddleware; use serde::{Deserialize, Serialize}; +use std::net::IpAddr; +use tokio::net::lookup_host; use tracing::{info, warn}; use url::Url; use urlencoding::encode; @@ -41,12 +44,45 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder { .user_agent(user_agent.clone()) .timeout(REQWEST_TIMEOUT) .connect_timeout(REQWEST_TIMEOUT) + .redirect(Policy::none()) .use_rustls_tls() } /// Fetches metadata for the given link and optionally generates thumbnail. #[tracing::instrument(skip_all)] -pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResult { +pub async fn fetch_link_metadata( + url: &Url, + context: &LemmyContext, + recursion: bool, +) -> LemmyResult { + if url.scheme() != "http" && url.scheme() != "https" { + return Err(LemmyErrorType::InvalidUrl.into()); + } + + // Resolve the domain and throw an error if it points to any internal IP, + // using logic from nightly IpAddr::is_global. + if !cfg!(debug_assertions) { + // TODO: Replace with IpAddr::is_global() once stabilized + // https://doc.rust-lang.org/std/net/enum.IpAddr.html#method.is_global + let domain = url.domain().ok_or(FederationError::UrlWithoutDomain)?; + let invalid_ip = lookup_host((domain.to_owned(), 80)) + .await? + .any(|addr| match addr.ip() { + IpAddr::V4(addr) => { + addr.is_private() || addr.is_link_local() || addr.is_loopback() || addr.is_multicast() + } + IpAddr::V6(addr) => { + addr.is_loopback() + || addr.is_multicast() + || ((addr.segments()[0] & 0xfe00) == 0xfc00) // is_unique_local + || ((addr.segments()[0] & 0xffc0) == 0xfe80) // is_unicast_link_local + } + }); + if invalid_ip { + return Err(LemmyErrorType::InvalidUrl.into()); + } + } + info!("Fetching site metadata for url: {}", url); // We only fetch the first MB of data in order to not waste bandwidth especially for large // binary files. This high limit is particularly needed for youtube, which includes a lot of @@ -63,6 +99,16 @@ pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResu .await? .error_for_status()?; + // Manually follow one redirect, using internal IP check. Further redirects are ignored. + let location = response + .headers() + .get(LOCATION) + .and_then(|l| l.to_str().ok()); + if let (Some(location), false) = (location, recursion) { + let url = location.parse()?; + return Box::pin(fetch_link_metadata(&url, context, true)).await; + } + let mut content_type: Option = response .headers() .get(CONTENT_TYPE) @@ -150,7 +196,9 @@ pub async fn generate_post_link_metadata( context: Data, ) -> LemmyResult<()> { let metadata = match &post.url { - Some(url) => fetch_link_metadata(url, &context).await.unwrap_or_default(), + Some(url) => fetch_link_metadata(url, &context, false) + .await + .unwrap_or_default(), _ => Default::default(), }; @@ -498,7 +546,7 @@ mod tests { async fn test_link_metadata() -> LemmyResult<()> { let context = LemmyContext::init_test_context().await; let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ")?; - let sample_res = fetch_link_metadata(&sample_url, &context).await?; + let sample_res = fetch_link_metadata(&sample_url, &context, false).await?; assert_eq!( Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()), sample_res.opengraph_data.title diff --git a/crates/apub/src/objects/comment.rs b/crates/apub/src/objects/comment.rs index fd168e370..02e323510 100644 --- a/crates/apub/src/objects/comment.rs +++ b/crates/apub/src/objects/comment.rs @@ -3,7 +3,7 @@ use crate::{ check_apub_id_valid_with_strictness, fetcher::markdown_links::markdown_rewrite_remote_links, mentions::collect_non_local_mentions, - objects::{append_attachments_to_comment, read_from_string_or_source, verify_is_remote_object}, + objects::{append_attachments_to_comment, read_from_string_or_source}, protocol::{ objects::{note::Note, LanguageTag}, InCommunity, @@ -13,7 +13,10 @@ use crate::{ use activitypub_federation::{ config::Data, kinds::object::NoteType, - protocol::{values::MediaTypeMarkdownOrHtml, verification::verify_domains_match}, + protocol::{ + values::MediaTypeMarkdownOrHtml, + verification::{verify_domains_match, verify_is_remote_object}, + }, traits::Object, }; use chrono::{DateTime, Utc}; diff --git a/crates/apub/src/objects/instance.rs b/crates/apub/src/objects/instance.rs index 754172fe2..722e2205e 100644 --- a/crates/apub/src/objects/instance.rs +++ b/crates/apub/src/objects/instance.rs @@ -1,4 +1,3 @@ -use super::verify_is_remote_object; use crate::{ activities::GetActorType, check_apub_id_valid_with_strictness, @@ -15,7 +14,10 @@ use activitypub_federation::{ config::Data, fetch::object_id::ObjectId, kinds::actor::ApplicationType, - protocol::{values::MediaTypeHtml, verification::verify_domains_match}, + protocol::{ + values::MediaTypeHtml, + verification::{verify_domains_match, verify_is_remote_object}, + }, traits::{Actor, Object}, }; use chrono::{DateTime, Utc}; diff --git a/crates/apub/src/objects/mod.rs b/crates/apub/src/objects/mod.rs index f837f7ad3..b679636a3 100644 --- a/crates/apub/src/objects/mod.rs +++ b/crates/apub/src/objects/mod.rs @@ -1,16 +1,8 @@ use crate::protocol::{objects::page::Attachment, Source}; -use activitypub_federation::{ - config::Data, - fetch::object_id::ObjectId, - protocol::values::MediaTypeMarkdownOrHtml, - traits::Object, -}; -use anyhow::anyhow; +use activitypub_federation::{config::Data, protocol::values::MediaTypeMarkdownOrHtml}; use html2md::parse_html; use lemmy_api_common::context::LemmyContext; use lemmy_utils::error::LemmyResult; -use serde::Deserialize; -use std::fmt::Debug; pub mod comment; pub mod community; @@ -62,22 +54,3 @@ pub(crate) async fn append_attachments_to_comment( Ok(content) } - -/// When for example a Post is made in a remote community, the community will send it back, -/// wrapped in Announce. If we simply receive this like any other federated object, overwrite the -/// existing, local Post. In particular, it will set the field local = false, so that the object -/// can't be fetched from the Activitypub HTTP endpoint anymore (which only serves local objects). -pub(crate) fn verify_is_remote_object( - id: &ObjectId, - context: &Data, -) -> LemmyResult<()> -where - T: Object + Debug + Send + 'static, - for<'de2> ::Kind: Deserialize<'de2>, -{ - if id.is_local(context) { - Err(anyhow!("cant accept local object from remote instance").into()) - } else { - Ok(()) - } -} diff --git a/crates/apub/src/objects/person.rs b/crates/apub/src/objects/person.rs index 50f8e8563..9b659fa2f 100644 --- a/crates/apub/src/objects/person.rs +++ b/crates/apub/src/objects/person.rs @@ -1,4 +1,3 @@ -use super::verify_is_remote_object; use crate::{ activities::GetActorType, check_apub_id_valid_with_strictness, @@ -13,7 +12,7 @@ use crate::{ }; use activitypub_federation::{ config::Data, - protocol::verification::verify_domains_match, + protocol::verification::{verify_domains_match, verify_is_remote_object}, traits::{Actor, Object}, }; use chrono::{DateTime, Utc}; diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index e5ea5838a..72d351e80 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -3,7 +3,7 @@ use crate::{ check_apub_id_valid_with_strictness, fetcher::markdown_links::{markdown_rewrite_remote_links_opt, to_local_url}, local_site_data_cached, - objects::{read_from_string_or_source_opt, verify_is_remote_object}, + objects::read_from_string_or_source_opt, protocol::{ objects::{ page::{Attachment, AttributedTo, Hashtag, HashtagType, Page, PageType}, @@ -16,7 +16,10 @@ use crate::{ }; use activitypub_federation::{ config::Data, - protocol::{values::MediaTypeMarkdownOrHtml, verification::verify_domains_match}, + protocol::{ + values::MediaTypeMarkdownOrHtml, + verification::{verify_domains_match, verify_is_remote_object}, + }, traits::Object, }; use anyhow::anyhow; diff --git a/crates/apub/src/objects/private_message.rs b/crates/apub/src/objects/private_message.rs index 521419c82..aaa8e99f8 100644 --- a/crates/apub/src/objects/private_message.rs +++ b/crates/apub/src/objects/private_message.rs @@ -1,4 +1,3 @@ -use super::verify_is_remote_object; use crate::{ check_apub_id_valid_with_strictness, fetcher::markdown_links::markdown_rewrite_remote_links, @@ -10,7 +9,10 @@ use crate::{ }; use activitypub_federation::{ config::Data, - protocol::{values::MediaTypeHtml, verification::verify_domains_match}, + protocol::{ + values::MediaTypeHtml, + verification::{verify_domains_match, verify_is_remote_object}, + }, traits::Object, }; use chrono::{DateTime, Utc};