Implement image pre-processing on upload

This commit is contained in:
asonix 2022-09-25 15:17:33 -05:00
parent f98fec7d2a
commit 50d118a2a7
10 changed files with 190 additions and 29 deletions

1
Cargo.lock generated
View file

@ -1580,6 +1580,7 @@ dependencies = [
"serde",
"serde_cbor",
"serde_json",
"serde_urlencoded",
"sha2",
"sled",
"storage-path-generator",

View file

@ -40,11 +40,12 @@ once_cell = "1.4.0"
opentelemetry = { version = "0.18", features = ["rt-tokio"] }
opentelemetry-otlp = "0.11"
pin-project-lite = "0.2.7"
quick-xml = { version = "0.24.1", features = ["serialize"] }
rusty-s3 = "0.3.2"
serde = { version = "1.0", features = ["derive"] }
serde_cbor = "0.11.2"
serde_json = "1.0"
quick-xml = { version = "0.24.1", features = ["serialize"] }
serde_urlencoded = "0.7.1"
sha2 = "0.10.0"
sled = { version = "0.34.7" }
storage-path-generator = "0.1.0"

View file

@ -9,7 +9,7 @@ _a simple image hosting service_
## Usage
### Running
```
pict-rs 0.4.0-alpha.1
pict-rs 0.4.0-alpha.7
asonix <asonix@asonix.dog>
A simple image hosting service
@ -77,6 +77,9 @@ OPTIONS:
-h, --help
Print help information
--media-cache-duration <MEDIA_CACHE_DURATION>
How long, in hours, to keep media ingested through the "cached" endpoint
--media-enable-silent-video <MEDIA_ENABLE_SILENT_VIDEO>
Whether to enable GIF and silent MP4 uploads. Full videos are unsupported
@ -98,11 +101,14 @@ OPTIONS:
--media-max-width <MEDIA_MAX_WIDTH>
The maximum width, in pixels, for uploaded media
--media-preprocess-steps <MEDIA_PREPROCESS_STEPS>
Optional pre-processing steps for uploaded media
--media-skip-validate-imports <MEDIA_SKIP_VALIDATE_IMPORTS>
Whether to validate media on the "import" endpoint
--worker-id <WORKER_ID>
ID of this pict-rs node. Doesn't do much yet
SUBCOMMANDS:
filesystem Run pict-rs with filesystem storage
@ -327,7 +333,7 @@ set.
A secure API key can be generated by any password generator.
- `POST /internal/import` for uploading an image while preserving the filename as the first alias.
The upload format and response format are the same as the `POST /image` endpoint.
- `POST /internal/purge?alias={alias} Purge a file by it's alias. This removes all aliases and
- `POST /internal/purge?alias={alias}` Purge a file by it's alias. This removes all aliases and
files associated with the query.
This endpoint returns the following JSON
@ -337,10 +343,13 @@ A secure API key can be generated by any password generator.
"aliases": ["asdf.png"]
}
```
- `GET /internal/aliases?alias={alias} Get the aliases for a file by it's alias
- `GET /internal/aliases?alias={alias}` Get the aliases for a file by it's alias
- `?alias={alias}` get aliases by alias
This endpiont returns the same JSON as the purge endpoint
- `DELETE /internal/variants` Queue a cleanup for generated variants of uploaded images.
If any of the cleaned variants are fetched again, they will be re-generated.
Additionally, all endpoints support setting deadlines, after which the request will cease
processing. To enable deadlines for your requests, you can set the `X-Request-Deadline` header to an

View file

@ -115,6 +115,13 @@ path = '/mnt'
## Media Processing Configuration
[media]
## Optional: preprocessing steps for uploaded images
# environment variable: PICTRS__MEDIA__PREPROCESS_STEPS
# default: empty
#
# This configuration is the same format as the process endpoint's query arguments
preprocess_steps = 'crop=16x9&resize=1200&blur=0.2'
## Optional: max media width (in pixels)
# environment variable: PICTRS__MEDIA__MAX_WIDTH
# default: 10,000
@ -189,15 +196,36 @@ cache_capacity = 67108864
# available options: filesystem, object_storage
type = 'object_storage'
## Required: endpoint at which the object storage exists
# environment variable: PICTRS__STORE__ENDPOINT
# default: empty
#
# examples:
# - `http://localhost:9000` # minio
# - `https://s3.dualstack.eu-west-1.amazonaws.com` # s3
endpoint = 'http://minio:9000'
## Optional: How to format object storage requests
# environment variable: PICTRS__STORE__USE_PATH_STYLE
# default: false
#
# When this is true, objects will be fetched from http{s}://{endpoint}:{port}/{bucket_name}/{object}
# When false, objects will be fetched from http{s}://{bucket_name}.{endpoint}:{port}/{object}
#
# Set to true when using minio
use_path_style = false
## Required: object storage bucket name
# environment variable: PICTRS__STORE__BUCKET_NAME
# default: empty
bucket_name = 'BUCKET_NAME'
bucket_name = 'pict-rs'
## Required: object storage region
# environment variable: PICTRS__STORE__REGION
# default: empty
region = 'REGION'
#
# When using minio, this can be set to `minio`
region = 'minio'
## Required: object storage access key
# environment variable: PICTRS__STORE__ACCESS_KEY
@ -209,11 +237,6 @@ access_key = 'ACCESS_KEY'
# default: empty
secret_key = 'SECRET_KEY'
## Optional: object storage security token
# environment variable: PICTRS__STORE__SECURITY_TOKEN
# default: empty
security_token = 'SECURITY_TOKEN'
## Optional: object storage session token
# environment variable: PICTRS__STORE__SESSION_TOKEN
# default: empty

View file

@ -45,6 +45,7 @@ impl Args {
address,
api_key,
worker_id,
media_preprocess_steps,
media_skip_validate_imports,
media_max_width,
media_max_height,
@ -62,6 +63,7 @@ impl Args {
worker_id,
};
let media = Media {
preprocess_steps: media_preprocess_steps,
skip_validate_imports: media_skip_validate_imports,
max_width: media_max_width,
max_height: media_max_height,
@ -299,6 +301,8 @@ struct OldDb {
#[derive(Debug, Default, serde::Serialize)]
#[serde(rename_all = "snake_case")]
struct Media {
#[serde(skip_serializing_if = "Option::is_none")]
preprocess_steps: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
max_width: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
@ -383,9 +387,16 @@ struct Run {
#[clap(long)]
api_key: Option<String>,
/// ID of this pict-rs node. Doesn't do much yet
#[clap(long)]
worker_id: Option<String>,
/// Optional pre-processing steps for uploaded media.
///
/// All still images will be put through these steps before saving
#[clap(long)]
media_preprocess_steps: Option<String>,
/// Whether to validate media on the "import" endpoint
#[clap(long)]
media_skip_validate_imports: Option<bool>,

View file

@ -2,6 +2,7 @@ use crate::{
config::primitives::{ImageFormat, LogFormat, Store, Targets},
serde_str::Serde,
};
use once_cell::sync::OnceCell;
use std::{collections::BTreeSet, net::SocketAddr, path::PathBuf};
use url::Url;
@ -86,6 +87,9 @@ pub(crate) struct OldDb {
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "snake_case")]
pub(crate) struct Media {
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) preprocess_steps: Option<String>,
pub(crate) max_width: usize,
pub(crate) max_height: usize,
@ -106,6 +110,25 @@ pub(crate) struct Media {
pub(crate) cache_duration: i64,
}
impl Media {
pub(crate) fn preprocess_steps(&self) -> Option<&[(String, String)]> {
static PREPROCESS_STEPS: OnceCell<Vec<(String, String)>> = OnceCell::new();
if let Some(steps) = &self.preprocess_steps {
let steps = PREPROCESS_STEPS
.get_or_try_init(|| {
serde_urlencoded::from_str(steps) as Result<Vec<(String, String)>, _>
})
.expect("Invalid preprocess_steps configuration")
.as_slice();
Some(steps)
} else {
None
}
}
}
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "snake_case")]
pub(crate) struct Sled {

View file

@ -121,6 +121,14 @@ impl ImageFormat {
Self::Webp => "WEBP",
}
}
pub(crate) fn as_ext(self) -> &'static str {
match self {
Self::Jpeg => ".jpeg",
Self::Png => ".png",
Self::Webp => ".webp",
}
}
}
impl From<Filesystem> for Store {

View file

@ -1,4 +1,5 @@
use crate::{
either::Either,
error::{Error, UploadError},
magick::ValidInputType,
repo::{Alias, AliasRepo, DeleteToken, FullRepo, HashRepo},
@ -32,15 +33,15 @@ where
{
futures_util::pin_mut!(stream);
let mut total_len = 0;
let mut buf = Vec::new();
tracing::debug!("Reading stream to memory");
while let Some(res) = stream.next().await {
let bytes = res?;
total_len += bytes.len();
buf.push(bytes);
}
let total_len = buf.iter().fold(0, |acc, item| acc + item.len());
let bytes_mut = buf
.iter()
.fold(BytesMut::with_capacity(total_len), |mut acc, item| {
@ -77,7 +78,22 @@ where
)
.await?;
let hasher_reader = Hasher::new(validated_reader, Sha256::new());
let processed_reader = if let Some(operations) = CONFIG.media.preprocess_steps() {
if let Some(format) = input_type.to_format() {
let (_, magick_args) = crate::processor::build_chain(operations, format.as_ext())?;
let processed_reader =
crate::magick::process_image_async_read(validated_reader, magick_args, format)?;
Either::left(processed_reader)
} else {
Either::right(validated_reader)
}
} else {
Either::right(validated_reader)
};
let hasher_reader = Hasher::new(processed_reader, Sha256::new());
let hasher = hasher_reader.hasher();
let identifier = store.save_async_read(hasher_reader).await?;

View file

@ -39,7 +39,7 @@ pub(crate) enum ValidInputType {
}
impl ValidInputType {
fn as_str(&self) -> &'static str {
fn as_str(self) -> &'static str {
match self {
Self::Mp4 => "MP4",
Self::Gif => "GIF",
@ -49,7 +49,7 @@ impl ValidInputType {
}
}
pub(crate) fn as_ext(&self) -> &'static str {
pub(crate) fn as_ext(self) -> &'static str {
match self {
Self::Mp4 => ".mp4",
Self::Gif => ".gif",
@ -59,7 +59,7 @@ impl ValidInputType {
}
}
fn is_mp4(&self) -> bool {
fn is_mp4(self) -> bool {
matches!(self, Self::Mp4)
}
@ -70,6 +70,15 @@ impl ValidInputType {
ImageFormat::Webp => ValidInputType::Webp,
}
}
pub(crate) fn to_format(self) -> Option<ImageFormat> {
match self {
Self::Jpeg => Some(ImageFormat::Jpeg),
Self::Png => Some(ImageFormat::Png),
Self::Webp => Some(ImageFormat::Webp),
_ => None,
}
}
}
#[derive(Debug)]
@ -256,6 +265,19 @@ pub(crate) async fn input_type_bytes(input: Bytes) -> Result<ValidInputType, Err
details_bytes(input, None).await?.validate_input()
}
fn process_image(args: Vec<String>, format: ImageFormat) -> std::io::Result<Process> {
let command = "magick";
let convert_args = ["convert", "-"];
let last_arg = format!("{}:-", format.as_magick_format());
Process::spawn(
Command::new(command)
.args(convert_args)
.args(args)
.arg(last_arg),
)
}
#[instrument(name = "Spawning process command")]
pub(crate) fn process_image_store_read<S: Store + 'static>(
store: S,
@ -263,18 +285,16 @@ pub(crate) fn process_image_store_read<S: Store + 'static>(
args: Vec<String>,
format: ImageFormat,
) -> std::io::Result<impl AsyncRead + Unpin> {
let command = "magick";
let convert_args = ["convert", "-"];
let last_arg = format!("{}:-", format.as_magick_format());
Ok(process_image(args, format)?.store_read(store, identifier))
}
let process = Process::spawn(
Command::new(command)
.args(convert_args)
.args(args)
.arg(last_arg),
)?;
Ok(process.store_read(store, identifier))
#[instrument(name = "Spawning process command", skip(async_read))]
pub(crate) fn process_image_async_read<A: AsyncRead + Unpin + 'static>(
async_read: A,
args: Vec<String>,
format: ImageFormat,
) -> std::io::Result<impl AsyncRead + Unpin> {
Ok(process_image(args, format)?.pipe_async_read(async_read))
}
impl Details {

View file

@ -152,6 +152,55 @@ impl Process {
}
}
pub(crate) fn pipe_async_read<A: AsyncRead + Unpin + 'static>(
mut self,
mut async_read: A,
) -> impl AsyncRead + Unpin {
let mut stdin = self.child.stdin.take().expect("stdin exists");
let stdout = self.child.stdout.take().expect("stdout exists");
let (tx, rx) = tracing::trace_span!(parent: None, "Create channel")
.in_scope(channel::<std::io::Error>);
let span = tracing::info_span!(parent: None, "Background process task from bytes");
span.follows_from(Span::current());
let mut child = self.child;
let handle = tracing::trace_span!(parent: None, "Spawn task").in_scope(|| {
actix_rt::spawn(
async move {
if let Err(e) = tokio::io::copy(&mut async_read, &mut stdin).await {
let _ = tx.send(e);
return;
}
drop(stdin);
match child.wait().await {
Ok(status) => {
if !status.success() {
let _ = tx.send(std::io::Error::new(
std::io::ErrorKind::Other,
&StatusError,
));
}
}
Err(e) => {
let _ = tx.send(e);
}
}
}
.instrument(span),
)
});
ProcessRead {
inner: stdout,
err_recv: rx,
err_closed: false,
handle: DropHandle { inner: handle },
}
}
#[tracing::instrument]
pub(crate) fn store_read<S: Store + 'static>(
mut self,