2021-11-13 17:37:31 +00:00
|
|
|
use std::path::Path;
|
2022-07-14 17:40:34 +00:00
|
|
|
use std::time::Duration;
|
2021-04-09 00:22:17 +00:00
|
|
|
|
2022-12-03 10:46:24 +00:00
|
|
|
use reqwest::{Client, Method, Proxy, RequestBuilder};
|
2021-04-09 00:22:17 +00:00
|
|
|
use serde_json::Value;
|
|
|
|
|
2023-01-14 23:31:11 +00:00
|
|
|
use crate::activitypub::{
|
|
|
|
actors::types::Actor,
|
|
|
|
constants::AP_MEDIA_TYPE,
|
|
|
|
types::Object,
|
|
|
|
};
|
2021-11-15 19:25:43 +00:00
|
|
|
use crate::config::Instance;
|
2022-10-23 16:04:02 +00:00
|
|
|
use crate::http_signatures::create::{
|
|
|
|
create_http_signature,
|
|
|
|
HttpSignatureError,
|
|
|
|
};
|
2023-01-14 00:46:49 +00:00
|
|
|
use crate::utils::files::{
|
|
|
|
save_file,
|
|
|
|
sniff_media_type,
|
|
|
|
SUPPORTED_MEDIA_TYPES,
|
|
|
|
};
|
2022-10-18 09:52:32 +00:00
|
|
|
use crate::utils::urls::guess_protocol;
|
2023-01-10 21:26:42 +00:00
|
|
|
use crate::webfinger::types::{ActorAddress, JsonResourceDescriptor};
|
2021-04-09 00:22:17 +00:00
|
|
|
|
2022-07-14 17:40:34 +00:00
|
|
|
const FETCHER_CONNECTION_TIMEOUT: u64 = 30;
|
|
|
|
|
2021-04-09 00:22:17 +00:00
|
|
|
#[derive(thiserror::Error, Debug)]
|
|
|
|
pub enum FetchError {
|
2021-11-15 19:25:43 +00:00
|
|
|
#[error(transparent)]
|
2022-10-23 16:04:02 +00:00
|
|
|
SignatureError(#[from] HttpSignatureError),
|
2021-11-15 19:25:43 +00:00
|
|
|
|
2021-04-09 00:22:17 +00:00
|
|
|
#[error(transparent)]
|
|
|
|
RequestError(#[from] reqwest::Error),
|
|
|
|
|
2022-10-24 21:19:48 +00:00
|
|
|
#[error("json parse error: {0}")]
|
2021-04-09 00:22:17 +00:00
|
|
|
JsonParseError(#[from] serde_json::Error),
|
|
|
|
|
2022-07-10 20:15:44 +00:00
|
|
|
#[error(transparent)]
|
2022-08-18 21:01:35 +00:00
|
|
|
FileError(#[from] std::io::Error),
|
2021-04-09 00:22:17 +00:00
|
|
|
|
2023-01-26 13:52:30 +00:00
|
|
|
#[error("too many objects")]
|
|
|
|
RecursionError,
|
|
|
|
|
2021-04-09 00:22:17 +00:00
|
|
|
#[error("{0}")]
|
|
|
|
OtherError(&'static str),
|
|
|
|
}
|
|
|
|
|
2022-10-19 18:39:47 +00:00
|
|
|
fn build_client(instance: &Instance) -> reqwest::Result<Client> {
|
|
|
|
let mut client_builder = Client::builder();
|
2022-07-14 17:40:34 +00:00
|
|
|
let connect_timeout = Duration::from_secs(FETCHER_CONNECTION_TIMEOUT);
|
2022-10-19 18:39:47 +00:00
|
|
|
if let Some(ref proxy_url) = instance.proxy_url {
|
|
|
|
let proxy = Proxy::all(proxy_url)?;
|
|
|
|
client_builder = client_builder.proxy(proxy);
|
|
|
|
};
|
|
|
|
client_builder
|
2022-07-14 17:40:34 +00:00
|
|
|
.connect_timeout(connect_timeout)
|
|
|
|
.build()
|
|
|
|
}
|
|
|
|
|
2022-12-03 10:46:24 +00:00
|
|
|
fn build_request(
|
|
|
|
instance: &Instance,
|
|
|
|
client: Client,
|
|
|
|
method: Method,
|
|
|
|
url: &str,
|
|
|
|
) -> RequestBuilder {
|
|
|
|
let mut request_builder = client.request(method, url);
|
|
|
|
if !instance.is_private {
|
|
|
|
// Public instance should set User-Agent header
|
|
|
|
request_builder = request_builder
|
|
|
|
.header(reqwest::header::USER_AGENT, instance.agent());
|
|
|
|
};
|
|
|
|
request_builder
|
|
|
|
}
|
|
|
|
|
2021-11-15 19:25:43 +00:00
|
|
|
/// Sends GET request to fetch AP object
|
|
|
|
async fn send_request(
|
|
|
|
instance: &Instance,
|
|
|
|
url: &str,
|
|
|
|
query_params: &[(&str, &str)],
|
|
|
|
) -> Result<String, FetchError> {
|
2022-10-19 18:39:47 +00:00
|
|
|
let client = build_client(instance)?;
|
2022-12-03 10:46:24 +00:00
|
|
|
let mut request_builder = build_request(instance, client, Method::GET, url)
|
|
|
|
.header(reqwest::header::ACCEPT, AP_MEDIA_TYPE);
|
|
|
|
|
2021-11-15 19:25:43 +00:00
|
|
|
if !query_params.is_empty() {
|
|
|
|
request_builder = request_builder.query(query_params);
|
|
|
|
};
|
2021-11-18 14:56:52 +00:00
|
|
|
if !instance.is_private {
|
|
|
|
// Only public instance can send signed request
|
|
|
|
let headers = create_http_signature(
|
|
|
|
Method::GET,
|
|
|
|
url,
|
|
|
|
"",
|
|
|
|
&instance.actor_key,
|
|
|
|
&instance.actor_key_id(),
|
|
|
|
)?;
|
|
|
|
request_builder = request_builder
|
|
|
|
.header("Host", headers.host)
|
|
|
|
.header("Date", headers.date)
|
|
|
|
.header("Signature", headers.signature);
|
|
|
|
};
|
2021-11-15 19:25:43 +00:00
|
|
|
|
|
|
|
let data = request_builder
|
|
|
|
.send().await?
|
|
|
|
.error_for_status()?
|
|
|
|
.text().await?;
|
|
|
|
Ok(data)
|
|
|
|
}
|
|
|
|
|
2022-05-02 00:01:57 +00:00
|
|
|
pub async fn fetch_file(
|
2022-10-19 18:39:47 +00:00
|
|
|
instance: &Instance,
|
2022-05-02 00:01:57 +00:00
|
|
|
url: &str,
|
2023-01-04 20:04:10 +00:00
|
|
|
maybe_media_type: Option<&str>,
|
2023-01-20 00:18:50 +00:00
|
|
|
file_max_size: usize,
|
2022-05-02 00:01:57 +00:00
|
|
|
output_dir: &Path,
|
2023-01-19 21:57:29 +00:00
|
|
|
) -> Result<(String, usize, Option<String>), FetchError> {
|
2022-10-19 18:39:47 +00:00
|
|
|
let client = build_client(instance)?;
|
2022-12-03 10:46:24 +00:00
|
|
|
let request_builder =
|
|
|
|
build_request(instance, client, Method::GET, url);
|
2022-12-03 11:16:40 +00:00
|
|
|
let response = request_builder.send().await?.error_for_status()?;
|
2022-09-01 12:21:25 +00:00
|
|
|
if let Some(file_size) = response.content_length() {
|
2023-01-20 00:18:50 +00:00
|
|
|
let file_size: usize = file_size.try_into()
|
|
|
|
.expect("value should be within bounds");
|
2023-01-16 16:54:57 +00:00
|
|
|
if file_size > file_max_size {
|
2022-09-01 12:21:25 +00:00
|
|
|
return Err(FetchError::OtherError("file is too large"));
|
|
|
|
};
|
|
|
|
};
|
2022-05-02 00:01:57 +00:00
|
|
|
let file_data = response.bytes().await?;
|
2023-01-19 21:57:29 +00:00
|
|
|
let file_size = file_data.len();
|
|
|
|
if file_size > file_max_size {
|
2022-08-18 22:50:40 +00:00
|
|
|
return Err(FetchError::OtherError("file is too large"));
|
|
|
|
};
|
2023-01-04 20:04:10 +00:00
|
|
|
let maybe_media_type = maybe_media_type
|
|
|
|
.map(|media_type| media_type.to_string())
|
|
|
|
// Sniff media type if not provided
|
|
|
|
.or(sniff_media_type(&file_data))
|
2023-01-14 00:46:49 +00:00
|
|
|
// Remove media type if it is not supported to prevent XSS
|
|
|
|
.filter(|media_type| {
|
|
|
|
if SUPPORTED_MEDIA_TYPES.contains(&media_type.as_str()) {
|
|
|
|
true
|
|
|
|
} else {
|
|
|
|
log::info!(
|
|
|
|
"unsupported media type {}: {}",
|
|
|
|
media_type,
|
|
|
|
url,
|
|
|
|
);
|
|
|
|
false
|
|
|
|
}
|
|
|
|
});
|
2023-01-06 01:14:01 +00:00
|
|
|
let file_name = save_file(
|
|
|
|
file_data.to_vec(),
|
|
|
|
output_dir,
|
|
|
|
maybe_media_type.as_deref(),
|
|
|
|
)?;
|
2023-01-19 21:57:29 +00:00
|
|
|
Ok((file_name, file_size, maybe_media_type))
|
2022-05-02 00:01:57 +00:00
|
|
|
}
|
|
|
|
|
2022-07-08 23:14:23 +00:00
|
|
|
pub async fn perform_webfinger_query(
|
2021-11-15 19:25:43 +00:00
|
|
|
instance: &Instance,
|
2022-05-02 22:50:07 +00:00
|
|
|
actor_address: &ActorAddress,
|
2022-07-08 23:14:23 +00:00
|
|
|
) -> Result<String, FetchError> {
|
2022-10-04 18:34:00 +00:00
|
|
|
let webfinger_account_uri = format!("acct:{}", actor_address);
|
2022-05-02 22:50:07 +00:00
|
|
|
let webfinger_url = format!(
|
2022-10-18 09:52:32 +00:00
|
|
|
"{}://{}/.well-known/webfinger",
|
|
|
|
guess_protocol(&actor_address.hostname),
|
2022-10-09 14:26:58 +00:00
|
|
|
actor_address.hostname,
|
2022-05-02 22:50:07 +00:00
|
|
|
);
|
2022-10-19 18:39:47 +00:00
|
|
|
let client = build_client(instance)?;
|
2022-12-03 10:46:24 +00:00
|
|
|
let request_builder =
|
|
|
|
build_request(instance, client, Method::GET, &webfinger_url);
|
2022-02-08 19:51:40 +00:00
|
|
|
let webfinger_data = request_builder
|
2021-04-09 00:22:17 +00:00
|
|
|
.query(&[("resource", webfinger_account_uri)])
|
|
|
|
.send().await?
|
2021-11-07 13:43:20 +00:00
|
|
|
.error_for_status()?
|
2021-04-09 00:22:17 +00:00
|
|
|
.text().await?;
|
|
|
|
let jrd: JsonResourceDescriptor = serde_json::from_str(&webfinger_data)?;
|
2022-07-08 23:14:23 +00:00
|
|
|
let link = jrd.links.into_iter()
|
2021-04-09 00:22:17 +00:00
|
|
|
.find(|link| link.rel == "self")
|
|
|
|
.ok_or(FetchError::OtherError("self link not found"))?;
|
2022-07-08 23:14:23 +00:00
|
|
|
let actor_url = link.href
|
2021-04-09 00:22:17 +00:00
|
|
|
.ok_or(FetchError::OtherError("account href not found"))?;
|
2022-07-08 23:14:23 +00:00
|
|
|
Ok(actor_url)
|
2021-04-09 00:22:17 +00:00
|
|
|
}
|
|
|
|
|
2022-05-22 00:07:01 +00:00
|
|
|
pub async fn fetch_actor(
|
2022-05-22 17:14:07 +00:00
|
|
|
instance: &Instance,
|
|
|
|
actor_url: &str,
|
|
|
|
) -> Result<Actor, FetchError> {
|
|
|
|
let actor_json = send_request(instance, actor_url, &[]).await?;
|
2022-07-09 20:35:04 +00:00
|
|
|
let actor: Actor = serde_json::from_str(&actor_json)?;
|
|
|
|
if actor.id != actor_url {
|
|
|
|
log::warn!("redirected from {} to {}", actor_url, actor.id);
|
|
|
|
};
|
2022-05-22 17:14:07 +00:00
|
|
|
Ok(actor)
|
|
|
|
}
|
|
|
|
|
2021-10-09 23:59:45 +00:00
|
|
|
pub async fn fetch_object(
|
2021-11-18 14:37:48 +00:00
|
|
|
instance: &Instance,
|
2021-10-09 23:59:45 +00:00
|
|
|
object_url: &str,
|
|
|
|
) -> Result<Object, FetchError> {
|
2021-11-18 14:37:48 +00:00
|
|
|
let object_json = send_request(instance, object_url, &[]).await?;
|
2021-10-09 23:59:45 +00:00
|
|
|
let object_value: Value = serde_json::from_str(&object_json)?;
|
|
|
|
let object: Object = serde_json::from_value(object_value)?;
|
|
|
|
Ok(object)
|
|
|
|
}
|