From a57658d99c385522328ba8ae5ddaf5d85ffa1475 Mon Sep 17 00:00:00 2001 From: Nick Webster Date: Fri, 1 Sep 2023 02:36:39 +1200 Subject: [PATCH] Adding a new config flag to disable pictrs caching for thumbnails (#3897) * add logic to prevent downloading remote pictrs images * apply formatting * Do not attempt a pictrs fetch if the remote image is also on a pictrs instance * Do not attempt a pictrs fetch if the remote image is also on a pictrs instance and cache_federated_images is false * Generalising the no caching option to handle all remote images * rustfmt * Return None if the URL is not an image * Updating defaults.hjson * fixing typo * Fixing typo * Skip cloning the Url unless we need to * using a HEAD request for checking the content type, saving bandwidth/improving perf * Removing early returns * Switching back to GET requests for Content-Type because pictrs does not handle HEAD requests * Simplifying logic and using metadata_image instead of url if we do not get a pictrs thumbnail * Removing unused import * Return None as a thumbnail if caching is disabled * formatting --------- Co-authored-by: Djones4822 --- config/defaults.hjson | 2 + crates/api_common/src/request.rs | 107 ++++++++++++++------------- crates/utils/src/error.rs | 1 + crates/utils/src/settings/structs.rs | 4 + docker/lemmy.hjson | 1 + 5 files changed, 62 insertions(+), 53 deletions(-) diff --git a/config/defaults.hjson b/config/defaults.hjson index 1e5597a11..9511dc5bf 100644 --- a/config/defaults.hjson +++ b/config/defaults.hjson @@ -43,6 +43,8 @@ url: "http://localhost:8080/" # Set a custom pictrs API key. ( Required for deleting images ) api_key: "string" + # Cache remote images + cache_remote_images: true } # Email sending configuration. All options except login/password are mandatory email: { diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 90364a978..55a9be1ff 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -123,24 +123,29 @@ pub(crate) async fn fetch_pictrs( let pictrs_config = settings.pictrs_config()?; is_image_content_type(client, image_url).await?; - let fetch_url = format!( - "{}image/download?url={}", - pictrs_config.url, - utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed - ); + if pictrs_config.cache_remote_images { + // fetch remote non-pictrs images for persistent thumbnail link + let fetch_url = format!( + "{}image/download?url={}", + pictrs_config.url, + utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed + ); - let response = client - .get(&fetch_url) - .timeout(REQWEST_TIMEOUT) - .send() - .await?; + let response = client + .get(&fetch_url) + .timeout(REQWEST_TIMEOUT) + .send() + .await?; - let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?; + let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?; - if response.msg == "ok" { - Ok(response) + if response.msg == "ok" { + Ok(response) + } else { + Err(LemmyErrorType::PictrsResponseError(response.msg))? + } } else { - Err(LemmyErrorType::PictrsResponseError(response.msg))? + Err(LemmyErrorType::PictrsCachingDisabled)? } } @@ -185,7 +190,7 @@ pub async fn purge_image_from_pictrs( } /// Both are options, since the URL might be either an html page, or an image -/// Returns the SiteMetadata, and a Pictrs URL, if there is a picture associated +/// Returns the SiteMetadata, and an image URL, if there is a picture associated #[tracing::instrument(skip_all)] pub async fn fetch_site_data( client: &ClientWithMiddleware, @@ -200,50 +205,46 @@ pub async fn fetch_site_data( // Warning, this may ignore SSL errors let metadata_option = fetch_site_metadata(client, url).await.ok(); if !include_image { - return (metadata_option, None); + (metadata_option, None) + } else { + let thumbnail_url = + fetch_pictrs_url_from_site_metadata(client, &metadata_option, settings, url) + .await + .ok(); + (metadata_option, thumbnail_url) } - - let missing_pictrs_file = - |r: PictrsResponse| r.files.first().expect("missing pictrs file").file.clone(); - - // Fetch pictrs thumbnail - let pictrs_hash = match &metadata_option { - Some(metadata_res) => match &metadata_res.image { - // Metadata, with image - // Try to generate a small thumbnail if there's a full sized one from post-links - Some(metadata_image) => fetch_pictrs(client, settings, metadata_image) - .await - .map(missing_pictrs_file), - // Metadata, but no image - None => fetch_pictrs(client, settings, url) - .await - .map(missing_pictrs_file), - }, - // No metadata, try to fetch the URL as an image - None => fetch_pictrs(client, settings, url) - .await - .map(missing_pictrs_file), - }; - - // The full urls are necessary for federation - let pictrs_thumbnail = pictrs_hash - .map(|p| { - Url::parse(&format!( - "{}/pictrs/image/{}", - settings.get_protocol_and_hostname(), - p - )) - .ok() - }) - .ok() - .flatten(); - - (metadata_option, pictrs_thumbnail.map(Into::into)) } None => (None, None), } } +async fn fetch_pictrs_url_from_site_metadata( + client: &ClientWithMiddleware, + metadata_option: &Option, + settings: &Settings, + url: &Url, +) -> Result { + let pictrs_res = match metadata_option { + Some(metadata_res) => match &metadata_res.image { + // Metadata, with image + // Try to generate a small thumbnail if there's a full sized one from post-links + Some(metadata_image) => fetch_pictrs(client, settings, metadata_image).await, + // Metadata, but no image + None => fetch_pictrs(client, settings, url).await, + }, + // No metadata, try to fetch the URL as an image + None => fetch_pictrs(client, settings, url).await, + }?; + + Url::parse(&format!( + "{}/pictrs/image/{}", + settings.get_protocol_and_hostname(), + pictrs_res.files.first().expect("missing pictrs file").file + )) + .map(Into::into) + .map_err(Into::into) +} + #[tracing::instrument(skip_all)] async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> { let response = client.get(url.as_str()).send().await?; diff --git a/crates/utils/src/error.rs b/crates/utils/src/error.rs index e1b370387..ac7e386f5 100644 --- a/crates/utils/src/error.rs +++ b/crates/utils/src/error.rs @@ -87,6 +87,7 @@ pub enum LemmyErrorType { SiteMetadataPageIsNotDoctypeHtml, PictrsResponseError(String), PictrsPurgeResponseError(String), + PictrsCachingDisabled, ImageUrlMissingPathSegments, ImageUrlMissingLastPathSegment, PictrsApiKeyNotProvided, diff --git a/crates/utils/src/settings/structs.rs b/crates/utils/src/settings/structs.rs index 16a27b93a..01b968621 100644 --- a/crates/utils/src/settings/structs.rs +++ b/crates/utils/src/settings/structs.rs @@ -62,6 +62,10 @@ pub struct PictrsConfig { /// Set a custom pictrs API key. ( Required for deleting images ) #[default(None)] pub api_key: Option, + + /// Cache remote images + #[default(true)] + pub cache_remote_images: bool, } #[derive(Debug, Deserialize, Serialize, Clone, SmartDefault, Document)] diff --git a/docker/lemmy.hjson b/docker/lemmy.hjson index f7ffcc1e3..4756ddadf 100644 --- a/docker/lemmy.hjson +++ b/docker/lemmy.hjson @@ -21,6 +21,7 @@ pictrs: { url: "http://pictrs:8080/" # api_key: "API_KEY" + cache_remote_images: true } #opentelemetry_url: "http://otel:4137"