Compare commits

...

82 commits
v0.5.7 ... main

Author SHA1 Message Date
asonix dd01548309 Fix backgrounded query boolean parsing 2024-06-05 12:54:05 -05:00
asonix f05eed2e36 Prepare 0.5.15 2024-06-04 11:49:13 -05:00
asonix 69470c8a38 Update defaults.toml 2024-06-04 11:34:28 -05:00
asonix 4e75764110 Add configuration option to control request logging 2024-06-03 22:15:52 -05:00
asonix 6ef9dc404f Update flake 2024-06-03 21:47:39 -05:00
asonix af3a6a260a Update dependencies (minor & point) 2024-06-03 21:40:08 -05:00
asonix 286bc8b97a Log for completed requests 2024-06-03 21:38:41 -05:00
asonix 8cf8b2bc05 Remove proxies row before deleting alias
This fixes the issue of invalidating the proxies alias foreign key
2024-06-03 16:29:13 -05:00
asonix 1c4e343d9d Prepare 0.5.14 2024-05-20 22:23:08 -05:00
asonix d03cc63d2b ffprobe: handle files with empty stream json 2024-05-20 22:08:54 -05:00
asonix 260f9a158a Update dependencies (minor & point) 2024-05-19 10:39:21 -05:00
asonix a7c78cd54e Merge pull request 'Update rustls for tokio-postgres' (#58) from asonix/update-tokio-postgres-rustls into main
Reviewed-on: https://git.asonix.dog/asonix/pict-rs/pulls/58
2024-05-19 15:35:47 +00:00
asonix d7dc2e506d Merge branch 'main' into asonix/update-tokio-postgres-rustls 2024-05-19 10:21:12 -05:00
asonix e48f60a6c6 Merge pull request 'Update rustls for actix-web' (#61) from asonix/update-actix-web-rustls into main
Reviewed-on: https://git.asonix.dog/asonix/pict-rs/pulls/61
2024-05-19 15:18:37 +00:00
asonix 9d01aeb82c Update rustls for actix-web
includes update for rustls-channel-resolver
2024-05-19 10:08:48 -05:00
asonix bddfb3c9d0 Merge branch 'main' into asonix/update-tokio-postgres-rustls 2024-05-19 09:40:45 -05:00
asonix 7ae3c0c776 Merge pull request 'Update reqwest to 0.12' (#59) from asonix/update-reqwest into main
Reviewed-on: https://git.asonix.dog/asonix/pict-rs/pulls/59
2024-05-19 14:37:50 +00:00
asonix 983e9ce151 Merge branch 'main' into asonix/update-reqwest 2024-05-19 09:36:54 -05:00
asonix 9302062b26 Merge pull request 'Update metrics-exporter-prometheus' (#60) from asonix/update-metrics-exporter-prometheus into main
Reviewed-on: https://git.asonix.dog/asonix/pict-rs/pulls/60
2024-05-19 14:35:27 +00:00
asonix 33e72266f5 Add public mechanism for installing aws-lc-rs 2024-05-03 23:05:17 -05:00
asonix 39da69b1aa Use tokio-postgres-generic-rustls 2024-05-03 22:39:30 -05:00
asonix 64b8635059 Update rustls for tokio-postgres
This doesn't update rustls for actix-web (0.22), or rustls for reqwest (0.21)
2024-05-03 22:39:30 -05:00
asonix d45e3fa386 Remove unused 'remove' repo method 2024-05-03 22:35:20 -05:00
asonix bfd4fd4689 Remove unused StatusError type 2024-05-03 22:34:18 -05:00
asonix 89f3c447a8 clippy 2024-05-01 14:57:03 -05:00
asonix 46cfbf99a5 Update metrics-exporter-prometheus
This pulls in hyper 1 and http 1
2024-05-01 14:50:20 -05:00
asonix 58529a2eb2 Update reqwest to 0.12
This pulls in hyper 1 and http 1, but removes rustls 0.21
2024-05-01 14:46:29 -05:00
asonix 700aeb90e0 Fix time deprecation warnings 2024-05-01 14:33:07 -05:00
asonix ff39c30cc8 Update direct base64 dependency 2024-05-01 14:32:26 -05:00
asonix 9561c578dc Update dependencies (minor & point) 2024-05-01 14:30:22 -05:00
asonix dc7bdf7eeb Update flake.lock 2024-04-21 21:02:42 -05:00
asonix 33ba045ee1 Apparently imagemagick needs a shell to delegate to ffmpeg properly 2024-04-21 21:02:31 -05:00
asonix f082e48ed8 Attempt to set up nix-based docker for pict-rs
There's a bug when converting APNG files to WEBP files, which
imagemagick delegates to ffmpeg. When doing 'nix build' and running the
result, or running pict-rs in the dev shell, it works fine. In the
container, this doesn't work at all. imagemagick complains that there's
no media to convert, implying ffmpeg has output a zero-sized file.

This work is helping to narrow down exactly what pict-rs needs to run,
though. This still needs to be tested against h264, h265, vp8, vp9 and
av1.
2024-04-21 14:31:03 -05:00
asonix 97159e0030 Update release document 2024-04-15 21:17:40 -05:00
asonix 6d40fbee47 Prepare 0.5.13 2024-04-15 15:31:31 -05:00
asonix c4e99ef539 Add ability to disable colorized logs 2024-04-15 15:16:10 -05:00
asonix 3428c31f16 Use tokio channels again 2024-04-14 20:06:58 -05:00
asonix 4bb3bad703 Prepare 0.5.12 2024-04-05 13:05:16 -05:00
asonix 4021458be8 Prevent divided-by-zero for empty BytesStreams 2024-04-05 12:57:40 -05:00
asonix eca3697410 Add panic boundaries for background jobs 2024-04-05 12:57:32 -05:00
asonix d41fca5b6c Don't let the doctests step on each other via /tmp 2024-04-04 14:39:30 -05:00
asonix e3183c923f Remove dev-dependency on tokio-uring - unneeded 2024-04-04 12:53:08 -05:00
asonix d97cfe2a64 Remove 'armed' from NotificationEntryInner by only creating them when needed 2024-04-03 13:22:34 -05:00
asonix cef9a68307 Update dependencies (minor & point) 2024-04-01 18:08:57 -05:00
asonix 5f9efb2e1a Prepare 0.5.11 2024-04-01 18:08:46 -05:00
asonix dfb38c7144 Merge pull request 'Background variant processing' (#56) from asonix/backgrounded-variants into main
Reviewed-on: https://git.asonix.dog/asonix/pict-rs/pulls/56
2024-04-01 22:17:30 +00:00
asonix a3bce4c2d3 clippy 2024-04-01 17:06:36 -05:00
asonix c013f697fd Update readme with new API information 2024-04-01 17:01:52 -05:00
asonix 960f6487b7 Queue generation jobs 2024-03-31 20:26:15 -05:00
asonix cd6fb84cc4 Add timeout, metrics back to processor 2024-03-31 16:34:50 -05:00
asonix 056b96d0ad Rename thumbnail_args to variant_args 2024-03-31 16:23:34 -05:00
asonix 74885f2932 Share notification map between sled, postgres 2024-03-31 16:00:23 -05:00
asonix d9d5ac5388 Make postgres work 2024-03-30 14:11:12 -05:00
asonix 612e4017d5 Postgres compiles 2024-03-30 12:10:31 -05:00
asonix b43a435e64 Broken!!!!! 2024-03-30 09:36:31 -05:00
asonix 6e9239fa36 Move variant methods into variant repo trait 2024-03-28 12:04:40 -05:00
asonix 525deffd8d Merge pull request 'Add per-upload limits and per-upload preprocess steps' (#55) from asonix/per-upload-limits-and-operations into main
Reviewed-on: https://git.asonix.dog/asonix/pict-rs/pulls/55
2024-03-28 01:17:32 +00:00
asonix fe5a5723be Merge branch 'main' into asonix/per-upload-limits-and-operations 2024-03-27 19:20:50 -05:00
asonix 3211ce459e Update dependencies (minor & point) 2024-03-27 19:11:41 -05:00
asonix 4b46f1ae2a Use stable actix-form-data 2024-03-27 19:10:58 -05:00
asonix 55bc4b64c1 Add per-upload validations and per-upload preprocess steps 2024-03-27 19:00:54 -05:00
asonix 84a882392a Start threading upload configuration into ingest 2024-03-27 16:57:22 -05:00
asonix 5f850f8c86 Prepare 0.5.10 release 2024-03-23 18:59:48 -05:00
asonix bcc7773433 Update dependencies (minor & point) 2024-03-23 18:49:48 -05:00
asonix 793d3c0c70 Cleanup temporary directory on launch by default 2024-03-23 18:42:12 -05:00
asonix 34b9919428 Fix release document 2x combo 2024-03-11 13:58:57 -05:00
asonix dacfc43c44 Fix release document 2024-03-11 13:57:53 -05:00
asonix 2ead3e00e2 Revert "Zigbuild with tokio-unstable"
This reverts commit b7f508207f.
2024-03-11 13:29:52 -05:00
asonix df04ca9b12 Prepare 0.5.9 2024-03-11 13:29:15 -05:00
asonix 9178e3ef9f Don't spawn_local without tokio-unstable 2024-03-11 13:20:26 -05:00
asonix b7f508207f Zigbuild with tokio-unstable 2024-03-11 13:20:12 -05:00
asonix d8d1ce1634 Don't mention tokio_uring 2024-03-10 23:02:03 -05:00
asonix 7021c50156 Prepare 0.5.8 2024-03-10 22:48:11 -05:00
asonix 6f95c72070 Add new feature for testing with errors, test & fix job retries 2024-03-10 22:02:27 -05:00
asonix 286279cdf5 Control warn level for long polls via cargo feature 2024-03-10 20:07:59 -05:00
asonix 996fe0686b Update opentelemetry dependencies 2024-03-10 16:03:46 -05:00
asonix 5b1f4219fa Update dependencies (minor & point) 2024-03-10 16:00:54 -05:00
asonix dff588aafd Switch from deadpool to bb8 2024-03-10 15:59:08 -05:00
asonix 4976fcb2eb Remove unneeded code 2024-03-09 22:53:46 -06:00
asonix aa4582a3f8 Remove unused store method 2024-03-09 15:38:39 -06:00
asonix e302df7e39 Add more poll timers, spawn process from background threads 2024-03-09 15:19:13 -06:00
asonix 9fe586b9dd Implement retries for jobs, start warning on long polls 2024-03-09 12:15:23 -06:00
72 changed files with 4017 additions and 2334 deletions

1628
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
[package]
name = "pict-rs"
description = "A simple image hosting service"
version = "0.5.7"
version = "0.5.15"
authors = ["asonix <asonix@asonix.dog>"]
license = "AGPL-3.0"
readme = "README.md"
@ -15,48 +15,51 @@ strip = true
[features]
default = []
io-uring = ["dep:tokio-uring", "sled/io_uring", "actix-web/experimental-io-uring"]
poll-timer-warnings = []
random-errors = ["dep:nanorand"]
[dependencies]
actix-form-data = "0.7.0-beta.6"
actix-web = { version = "4.0.0", default-features = false, features = ["rustls-0_22"] }
actix-form-data = "0.7.0-beta.7"
actix-web = { version = "4.6.0", default-features = false, features = ["rustls-0_23"] }
async-trait = "0.1.51"
barrel = { version = "0.7.0", features = ["pg"] }
base64 = "0.21.0"
base64 = "0.22.0"
bb8 = "0.8.3"
blurhash-update = "0.1.0"
clap = { version = "4.0.2", features = ["derive"] }
color-eyre = "0.6"
config = { version = "0.14.0", default-features = false, features = ["json", "ron", "toml", "yaml"] }
console-subscriber = "0.2"
dashmap = "5.1.0"
deadpool = { version = "0.9.5", features = ["rt_tokio_1"] }
diesel = { version = "2.1.1", features = ["postgres_backend", "serde_json", "time", "uuid"] }
diesel-async = { version = "0.4.1", features = ["postgres", "deadpool"] }
diesel-async = { version = "0.4.1", features = ["bb8", "postgres"] }
diesel-derive-enum = { version = "2.1.0", features = ["postgres"] }
flume = "0.11.0"
futures-core = "0.3"
hex = "0.4.3"
md-5 = "0.10.5"
metrics = "0.22.0"
metrics-exporter-prometheus = { version = "0.13.0", default-features = false, features = ["http-listener"] }
metrics-exporter-prometheus = { version = "0.14.0", default-features = false, features = ["http-listener"] }
mime = "0.3.1"
opentelemetry_sdk = { version = "0.21", features = ["rt-tokio"] }
opentelemetry = { version = "0.21" }
opentelemetry-otlp = "0.14"
nanorand = { version = "0.7", optional = true }
opentelemetry_sdk = { version = "0.22", features = ["rt-tokio"] }
opentelemetry = "0.22"
opentelemetry-otlp = "0.15"
pin-project-lite = "0.2.7"
refinery = { version = "0.8.10", features = ["tokio-postgres", "postgres"] }
reqwest = { version = "0.11.18", default-features = false, features = ["json", "rustls-tls", "stream"] }
reqwest-middleware = "0.2.2"
reqwest-tracing = { version = "0.4.5" }
# pinned to tokio-postgres-rustls
rustls = "0.22.0"
reqwest = { version = "0.12.0", default-features = false, features = ["json", "rustls-tls", "stream"] }
reqwest-middleware = "0.3.0"
reqwest-tracing = "0.5.0"
# pinned to tokio-postgres-generic-rustls
# pinned to actix-web
rustls = "0.23"
# pinned to rustls
rustls-channel-resolver = "0.2.0"
rustls-channel-resolver = "0.3.0"
# pinned to rustls
rustls-pemfile = "2.0.0"
rusty-s3 = "0.5.0"
serde = { version = "1.0", features = ["derive"] }
serde-tuple-vec-map = "1.0.1"
serde_json = "1.0"
serde-tuple-vec-map = "1.0.1"
serde_urlencoded = "0.7.1"
sha2 = "0.10.0"
sled = { version = "0.34.7" }
@ -66,7 +69,7 @@ thiserror = "1.0"
time = { version = "0.3.0", features = ["serde", "serde-well-known"] }
tokio = { version = "1", features = ["full", "tracing"] }
tokio-postgres = { version = "0.7.10", features = ["with-uuid-1", "with-time-0_3", "with-serde_json-1"] }
tokio-postgres-rustls = "0.11.0"
tokio-postgres-generic-rustls = { version = "0.1.0", default-features = false, features = ["aws-lc-rs"] }
tokio-uring = { version = "0.4", optional = true, features = ["bytes"] }
tokio-util = { version = "0.7", default-features = false, features = [
"codec",
@ -76,7 +79,7 @@ toml = "0.8.0"
tracing = "0.1.15"
tracing-error = "0.2.0"
tracing-log = "0.2.0"
tracing-opentelemetry = "0.22"
tracing-opentelemetry = "0.23"
tracing-subscriber = { version = "0.3.0", features = [
"ansi",
"env-filter",
@ -91,9 +94,6 @@ uuid = { version = "1", features = ["serde", "std", "v4", "v7"] }
webpki-roots = "0.26.0"
[dependencies.tracing-actix-web]
version = "0.7.8"
version = "0.7.10"
default-features = false
features = ["emit_event_on_error", "opentelemetry_0_21"]
[dev-dependencies]
tokio-uring = { version = "0.4", features = ["bytes"] }
features = ["opentelemetry_0_22"]

View file

@ -220,8 +220,7 @@ More information is available in the [Ubuntu and Debian docs](./docs/ubuntu-and-
##### Compile from Source
pict-rs can be compiled from source using a recent version of the rust compiler. I do development
and produce releases on 1.72. pict-rs also requires the `protoc` protobuf compiler to be present at
build-time in order to enable use of [`tokio-console`](https://github.com/tokio-rs/console).
and produce releases on 1.75
Like the Binary Download option, `imagemagick`, `ffmpeg`, and `exiftool` must be installed for
pict-rs to run properly.
@ -254,9 +253,27 @@ Example:
### API
pict-rs offers the following endpoints:
- `POST /image` for uploading an image. Uploaded content must be valid multipart/form-data with an
- `POST /image?{args}` for uploading an image. Uploaded content must be valid multipart/form-data with an
image array located within the `images[]` key
The {args} query serves multiple purpose for image uploads. The first is to provide
request-level validations for the uploaded media. Available keys are as follows:
- max_width: maximum width, in pixels, allowed for the uploaded media
- max_height: maximum height, in pixels, allowed for the uploaded media
- max_area: maximum area, in pixels, allowed for the uploaded media
- max_frame_count: maximum number of frames permitted for animations and videos
- max_file_size: maximum size, in megabytes, allowed
- allow_image: whether to permit still images in the upload
- allow_animation: whether to permit animations in the upload
- allow_video: whether to permit video in the upload
These validations apply in addition to the validations specified in the pict-rs configuration,
so uploaded media will be rejected if any of the validations fail.
The second purpose for the {args} query is to provide preprocess steps for the uploaded image.
The format is the same as in the process.{ext} endpoint. The images uploaded with these steps
provided will be processed before saving.
This endpoint returns the following JSON structure on success with a 201 Created status
```json
{
@ -295,7 +312,9 @@ pict-rs offers the following endpoints:
"msg": "ok"
}
```
- `POST /image/backgrounded` Upload an image, like the `/image` endpoint, but don't wait to validate and process it.
- `POST /image/backgrounded?{args}` Upload an image, like the `/image` endpoint, but don't wait to validate and process it.
The {args} query is the same format is the inline image upload endpoint.
This endpoint returns the following JSON structure on success with a 202 Accepted status
```json
{

View file

@ -4,6 +4,7 @@ read_only = false
danger_dummy_mode = false
max_file_count = 1
temporary_directory = "/tmp"
cleanup_temporary_directory = true
[client]
timeout = 30
@ -15,6 +16,8 @@ concurrency = 32
format = "normal"
targets = "info"
log_spans = false
no_ansi = false
log_requests = false
[tracing.console]
buffer_capacity = 102400
@ -46,7 +49,7 @@ proxy = "7d"
[media.magick]
max_width = 10000
max_height = 10000
max_area = 40000000
max_area = 20000
memory = 256
map = 512
disk = 1024

View file

@ -5,11 +5,11 @@
"systems": "systems"
},
"locked": {
"lastModified": 1705309234,
"narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"type": "github"
},
"original": {
@ -20,11 +20,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1705133751,
"narHash": "sha256-rCIsyE80jgiOU78gCWN3A0wE0tR2GI5nH6MlS+HaaSQ=",
"lastModified": 1717196966,
"narHash": "sha256-yZKhxVIKd2lsbOqYd5iDoUIwsRZFqE87smE2Vzf6Ck0=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "9b19f5e77dd906cb52dade0b7bd280339d2a1f3d",
"rev": "57610d2f8f0937f39dbd72251e9614b1561942d8",
"type": "github"
},
"original": {

View file

@ -15,13 +15,29 @@
in
{
packages = rec {
imagemagick7_pict-rs = pkgs.callPackage ./nix/pkgs/imagemagick_pict-rs {};
ffmpeg6_pict-rs = pkgs.callPackage ./nix/pkgs/ffmpeg_pict-rs {};
pict-rs = pkgs.callPackage ./pict-rs.nix {
inherit (pkgs.darwin.apple_sdk.frameworks) Security;
inherit imagemagick7_pict-rs ffmpeg6_pict-rs;
};
default = pict-rs;
};
docker = pkgs.dockerTools.buildLayeredImage {
name = "pict-rs";
tag = "latest";
contents = [ pkgs.tini self.packages.${system}.pict-rs pkgs.bash ];
config = {
Entrypoint = [ "/bin/tini" "--" "/bin/pict-rs" ];
Cmd = [ "run" ];
};
};
apps = rec {
dev = flake-utils.lib.mkApp { drv = self.packages.${system}.pict-rs; };
default = dev;
@ -33,11 +49,13 @@
cargo-outdated
certstrap
clippy
curl
diesel-cli
exiftool
ffmpeg_6-full
garage
imagemagick
self.packages.${system}.imagemagick7_pict-rs
self.packages.${system}.ffmpeg6_pict-rs
jq
minio-client
rust-analyzer
rustc

View file

@ -0,0 +1,5 @@
{ ffmpeg_6-headless }:
ffmpeg_6-headless.override {
withWebp = true;
}

View file

@ -0,0 +1,23 @@
{ imagemagick7 }:
imagemagick7.override {
bzip2Support = true;
zlibSupport = true;
libX11Support = false;
libXtSupport = false;
fontconfigSupport = false;
freetypeSupport = false;
libjpegSupport = true;
djvulibreSupport = false;
lcms2Support = false;
openexrSupport = false;
libjxlSupport = true;
libpngSupport = true;
liblqr1Support = false;
librsvgSupport = false;
libtiffSupport = false;
libxml2Support = false;
openjpegSupport = true;
libwebpSupport = true;
libheifSupport = true;
}

View file

@ -1,6 +1,6 @@
{ exiftool
, ffmpeg_6-full
, imagemagick
, ffmpeg6_pict-rs
, imagemagick7_pict-rs
, lib
, makeWrapper
, nixosTests
@ -11,7 +11,7 @@
rustPlatform.buildRustPackage {
pname = "pict-rs";
version = "0.5.7";
version = "0.5.15";
src = ./.;
cargoLock = {
@ -27,7 +27,7 @@ rustPlatform.buildRustPackage {
postInstall = ''
wrapProgram $out/bin/pict-rs \
--prefix PATH : "${lib.makeBinPath [ imagemagick ffmpeg_6-full exiftool ]}"
--prefix PATH : "${lib.makeBinPath [ imagemagick7_pict-rs ffmpeg6_pict-rs exiftool ]}"
'';
passthru.tests = { inherit (nixosTests) pict-rs; };

View file

@ -37,6 +37,11 @@ max_file_count = 1
# default: The system's advertised temporary directory ("/tmp" on most linuxes)
temporary_directory = "/tmp"
## Optional: whether to delete the contents of $temporary_directory/pict-rs on launch
# environment variable: PICTRS__SERVER__CLEANUP_TEMPORARY_DIRECTORY
# default: true
cleanup_temporary_directory = true
## Optional: path to server certificate to enable TLS
# environment variable: PICTRS__SERVER__CERTIFICATE
# default: empty
@ -93,6 +98,16 @@ targets = 'info'
# default: false
log_spans = false
## Optional: whether to disable colorized log output
# environment variable: PICTRS__TRACING__LOGGING__NO_ANSI
# default: false
no_ansi = false
## Optional: whether to log upon request completion
# environment variable: PICTRS__TRACING__LOGGING__LOG_REQUESTS
# default: false
log_requests = false
## Console configuration
[tracing.console]

31
releases/0.5.10.md Normal file
View file

@ -0,0 +1,31 @@
# pict-rs 0.5.10
## Overview
pict-rs 0.5.10 is a small release with changes to how pict-rs handles temporary files.
### Changes
- [Temporary File Cleanup](#temporary-file-cleanup)
## Upgrade Notes
There are no significant changes from 0.5.9. Upgrading should be as simple as pulling the new
version.
## Descriptions
### Temporary File Cleanup
pict-rs now nests its temporary files inside a `pict-rs` toplevel temporary folder. This is useful
because pict-rs 0.5.10 introduces a new behavior: it will completely delete that folder and its
contents on launch. If you are running multiple copies of pict-rs on the same host and they share
your temporary folder, this might cause problems. In that scenario, this behavior can be disabled by
setting `PICTRS__SERVER__CLEANUP_TEMPORARY_DIRECTORY=false` or passing
`--no-cleanup-temporary-directory` on the commandline.
This new behavior has been introduced in order to better clean up after crashes. If pict-rs is
killed while processing media, maybe due to an OOM, it will leave files behind in the temporary
directory. This can cause the temporary directory to grow, leading to memory or disk problems.

82
releases/0.5.11.md Normal file
View file

@ -0,0 +1,82 @@
# pict-rs 0.5.11
pict-rs is a simple image hosting microservice, designed to handle storing and retrieving images,
animations, and videos, as well as providing basic image processing functionality.
## Overview
pict-rs 0.5.11 introduces new per-upload media validations, and new per-upload media processing.
These features will enable applications to be more precise about their media requirements, such as
allowing different media types and sizes for different endpoints, or pre-processing certain media to
optimize for size.
### Features
- [Upload Validations](#upload-validations)
- [Upload Processing](#upload-processing)
### Changes
- [Backgrounded Variants](#backgrounded-variants)
## Upgrade Notes
For postgres-based installations, a small migration will be run when pict-rs 0.5.11 first launches
to create a new notifications table. No manual intervention is required. Upgrading should be as
simple as pulling a new version of pict-rs.
## Descriptions
### Upload Validations
When ingesting media using `POST /image`, `POST /image/backgrounded`, `POST /internal/import`, or
`GET /image/download`, validations can now be applied per-upload. These can be provided in the
request query. The following query parameters are supported:
- max_width: maximum width, in pixels, allowed for the uploaded media
- max_height: maximum height, in pixels, allowed for the uploaded media
- max_area: maximum area, in pixels, allowed for the uploaded media
- max_frame_count: maximum number of frames permitted for animations and videos
- max_file_size: maximum size, in megabytes, allowed
- allow_image: whether to permit still images in the upload
- allow_animation: whether to permit animations in the upload
- allow_video: whether to permit video in the upload
An example request could look like this: `POST /image/backgrounded?max_area=3200&allow_video=false`
Validations are performed in addition to the validations specified in the pict-rs configuration, so
if uploaded media violates any of the validations, it will fail to ingest.
### Upload Processing
In a similar vein to the upload validations, preprocessing steps can now be applied on a per-upload
basis. These are also provided as query parameters, and will be applied _instead of_ the configured
preprocess steps. The preprocess query parameters are provided and processed the same way as in the
`GET image/process.{ext}` endpoint.
An example request could be `POST /image/backgrounded?blur=2.5&resize=300`, which would blur the
uploaded image and fit it inside a 300x300 box before saving it.
### Backgrounded Variants
When serving images from the /process.{ext} endpoint, pict-rs will now queue the processing to
happen via the job queue, rather than processing media inline. It will still wait up to 30 seconds
for the processing to be complete, and return the processed image the same way it always has.
If processing exceeds 30 seconds, pict-rs will return a timeout error, but the processing will
continue in the background. The same variant can be requested again, and it will wait for the same
background process to complete, rather than trying to process the variant a second time.
pict-rs has historically had a method of reducing variant processing to prevent two requests for the
same variant from doing the same work, but this was only effective in environments that only ran 1
copy of pict-rs. In environments that had multiple replicas, each one could end up processing the
same variant if it was requested more than once at a time. This has been solved by using postgres as
a notification system to enable globally unique processing for a given variant.
In sled-based configurations there shouldn't be a noticible difference, aside from the 30 second
timeout on variant endpoints.

46
releases/0.5.12.md Normal file
View file

@ -0,0 +1,46 @@
# pict-rs 0.5.12
pict-rs is a simple image hosting microservice, designed to handle storing and retrieving images,
animations, and videos, as well as providing basic image processing functionality.
## Overview
pict-rs 0.5.12 is a bugfix release to remove two issues that, when compounded, would cause pict-rs
to fail to process media.
### Fixes
- [Panic Handling in Background Jobs](#panic-handling-in-background-jobs)
- [BytesStream Divide-by-Zero](#bytes-stream-divide-by-zero)
## Upgrade Notes
There are no significant differences from 0.5.11. Upgrading should be as simple as pulling a new
version of pict-rs.
## Descriptions
### Panic Handling in Background Jobs
pict-rs makes an effort to never use explicitly panicking code, but since there's no static way to
guarantee that a given function wont panic, pict-rs needs to be able to deal with that. pict-rs
0.5.12 now wraps invocations of jobs in spawned tasks, which can catch and report panics that happen
in background jobs.
Previously, a panic in a background job would bring down that thread's job processor, which resulted
in future jobs never being processed. Now job processing should properly continue after panics
occur.
### BytesStream Divide-by-Zero
Part of my rework of BytesStream recently included adding debug logs around how many bytes chunks
were in a given stream, and their average length. Unfortunately, if there were no bytes in the
stream, this would cause the "average chunk length" calculation to divide by 0. In previous versions
of pict-rs, this would generally result in a failed request for processed media, but in pict-rs
0.5.11 this would end up killing the background jobs processor.
This specific panic has been fixed by ensuring we divide by the number of chunks or 1, whichever is
greater.

62
releases/0.5.13.md Normal file
View file

@ -0,0 +1,62 @@
# pict-rs 0.5.13
pict-rs is a simple image hosting microservice, designed to handle storing and retrieving images,
animations, and videos, as well as providing basic image processing functionality.
## Overview
pict-rs 0.5.13 is a maintenance release aiming to enable better logging in some scenarios.
### Features
- [Colorless Logging](#colorless-logging)
### Changes
- [Remove Flume](#remove-flume)
## Upgrade Notes
There are no significant changes from 0.5.12. Upgrading should be as simple as pulling a new version
of pict-rs.
## Descriptions
### Colorless Logging
When opting to use the `json` logger, the tracing subscriber automatically disables colored output.
This didn't remove colors from errors, though, and pict-rs hasn't had a way to disable colors while
using other log formats. pict-rs 0.5.13 introduces a new configuration value to remove colored
output from all logs regardless of logging format.
With pict-rs.toml
```toml
[tracing.logging]
no_ansi = true
```
With environment variables
```bash
PICTRS__TRACING__LOGGING__NO_ANSI=true
```
With commandline flags
```bash
pict-rs --no-log-ansi run
```
Colors in logs can be useful, so I imagine this option won't be used much. There has been a request
for this functionality though and it's little cost to maintain.
### Remove Flume
Recently I've been debugging a memory usage issue in another project of mine. I wasn't able to fully
track down the cause, but I did notice that removing the
[flume channel library](https://github.com/zesterer/flume) seemed to make the leak go away. Since I
also use flume in pict-rs, I'm opting to replace it with tokio's native channel implementation. This
may or may not improve memory usage, but it does reduce the depenency count and therefore build time
for pict-rs.

28
releases/0.5.14.md Normal file
View file

@ -0,0 +1,28 @@
# pict-rs 0.5.14
pict-rs is a simple image hosting microservice, designed to handle storing and retrieving images,
animations, and videos, as well as providing basic image processing functionality.
## Overview
pict-rs 0.5.14 includes a bugfix for identifying certain MOV videos, as well as updated dependencies.
### Fixes
- [Empty Stream Parsing](#empty-stream-parsing)
## Upgrade Notes
There are no significant changes from 0.5.13. Upgrading should be as simple as pulling a new version
of pict-rs.
## Descriptions
### Empty Stream Parsing
Certain videos, when identified with ffprobe, contain stream json objects with no fields. This would
cause pict-rs to fail to parse the information for these videos, as it expects streams to at least
contain a codec field. In pict-rs 0.5.14, empty streams are now considered valid and are simply
ignored.

58
releases/0.5.15.md Normal file
View file

@ -0,0 +1,58 @@
# pict-rs 0.5.15
pict-rs is a simple image hosting microservice, designed to handle storing and retrieving images,
animations, and videos, as well as providing basic image processing functionality.
## Overview
pict-rs 0.5.15 includes a bugfix for cleaning proxied media, updated dependencies, and a new option
to log requests.
### Fixes
- [Proxied Media Cleanup](#proxied-media-cleanup)
### Additions
- [Request Logging](#request-logging)
## Upgrade Notes
There are no significant changes from 0.5.14. Upgrading should be a simple as pulling a new version
of pict-rs.
## Descriptions
### Proxied Media Cleanup
At some point, the cleanup logic for proxied media got flipped around to try removing the internal
alias before removing the proxy record. This works fine with a sled backend, but not with a
postgres backend, and postgres would complain about invalidating a foreign key relationship.
pict-rs 0.5.15 fixes this by ensuring that the related proxy record is cleaned first.
### Request Logging
A new configuration option has been added to pict-rs as an option to get more information about
what pict-rs is doing. By default, pict-rs only logs what it considers to be errors, but when
`log_requests` is enabled, it will also log information about successful requests. This can help
with debugging without enabling full debug logs or resorting to logging spans.
It can be configured via toml
```toml
[tracing.logging]
log_requests = true
```
via environment variables
```bash
PICTRS__TRACING__LOGGING__LOG_REQUESTS=true
```
or via the commandline
```bash
pict-rs --log-requests run
```

84
releases/0.5.8.md Normal file
View file

@ -0,0 +1,84 @@
# pict-rs 0.5.8
## Overview
pict-rs 0.5.8 improves reliability of deletions by allowing background tasks to be retried.
Otherwise changes are fairly minor.
### Changes
- [Improved Task Reliability](#improved-task-reliability)
- [Improved Latency](#improved-latency)
## Upgrade Notes
There is a small repo format migration between 0.5.7 and 0.5.8. For sled it's simply opening a new
tree, for postgre it involves adding a new column to the job_queue table. These changes will
automatically apply when launching pict-rs 0.5.8. Upgrading should be as simple as pulling a new
version of pict-rs.
## Configuration Notes
Check your configurations to make sure you haven't enabled the tokio-console integration unless
you're using it. In my local testing, I've found the console subscriber to use a significant amount
of CPU. While it is very useful for debugging, it shouldn't be used generally in production.
The relevant configuration values are `PICTRS__TRACING__CONSOLE__ADDRESS` with environment variables
or `[tracing.console] address = ""` in the toml.
## Packaging Notes
While I have never recommended packaging pict-rs with non-default crate features enabled, and the
binaries and containers I provide enable only the default features, there are two new crate features
in this release that I would advise against enabling in downstream packaging environments.
The new features are `poll-timer-warnings` and `random-errors`. These are each described below if
you want to learn about them, but as a general recommendation, do not enable non-default features
when packaging pict-rs (yes, i'm talking to you `grawlinson` from the AUR).
The other optional feature, `io-uring`, is considered less stable. It's possible that folks will
find it works alright, and maybe Arch can enable it since they can assume recent kernels, but I
don't personally test much with `io-uring`. It exists mostly as a historical curiosity. Please
consider carefully before enabling io-uring for pict-rs.
## Descriptions
### Improved Task Reliability
pict-rs 0.5.8 adds the ability for tasks to be retried. pict-rs generally spawns background tasks to
handle things like Image deletion or other cleanup operations. Until now, if a background task
failed, the only indication would be a warning that appeared in the logs. These warnings are
generally descriptive and help track the error source, but end users aren't notified, and the repo
or store state can become inconsistant.
With the newly added ability to retry tasks, operations should be completed more reliably. By
default, a failed task will be retried after a 2 minute wait, and if it continues to fail, it will
be retried up to five times. If a task fails after 5 retries, an additional warning will be output
to the log.
In order to test this, I've added a new optional crate feature called `random-errors`, which will
inject errors into various pict-rs operations randomly. This feature should never be enabled in
production scenarios, and two warnings will be printed when launching pict-rs if it was compiled
with this feature enabled.
### Improved Latency
pict-rs 0.5.8 implements a couple new techniques to improve system latency.
1. The postgres connection pooling library has been swapped from deadpool to bb8. Not only does this
(slightly) improve connection pool access times, but it also means pict-rs is no longer pinned
to an outdated version of deadpool.
2. Processes like ffmpeg, imagemagick, and exiftool are now spawned from background threads,
rather than from within the webserver threads. This is notable, since the act of spawning a
process ends up using a good amount of time, and prevents other requests from being handled
until the spawning has completed.
3. pict-rs now has the ability to monitor polling times for futures. By default, any task pict-rs
spawns itself will be monitored to report polling times, and a trait has been added to enable
easily tracking more polling times in the future. These polling times will appear in the
prometheus metrics, as well as in logs at DEBUG or TRACE visibility. There's an optional crate
feature called `poll-timer-warnings` that will upgrade some of these logs to WARN visibility.

32
releases/0.5.9.md Normal file
View file

@ -0,0 +1,32 @@
# pict-rs 0.5.9
## Overview
pict-rs 0.5.9 is a bugfix release for 0.5.8. All deployments on 0.5.8 should upgrade to 0.5.9
### Fixes
- [Fix Postgres Pooling](#fix-postgres-pooling)
- [Fix io-uring feature](#fix-io-uring-feature)
## Upgrade Notes
There are no significant changes from 0.5.8. Upgrading should be as simple as pulling the new
version.
## Descriptions
### Fix Postgres Pooling
When pict-rs 0.5.8 was built without the `tokio_unstable` flag, it would use tokio's `spwan_local`
utility from outside a LocalSet, leading to panics and timeouts. This release replaces the use of
`spawn_local` with `spawn` in that scenario.
### Fix io-uring Feature
As mentioned in the 0.5.8 release notes, io-uring is not considered to be a stable feature. However,
0.5.9 should make it usable again. Instead of manually launching a tokio_uring runtime, pict-rs once
again relies on actix-rt to configure a System for use with io-uring.

View file

@ -98,7 +98,8 @@ async fn read_rgba_command<S>(
(MAGICK_CONFIGURE_PATH, state.policy_dir.as_os_str()),
];
let process = Process::run("magick", &args, &envs, state.config.media.process_timeout)?
let process = Process::run("magick", &args, &envs, state.config.media.process_timeout)
.await?
.add_extras(temporary_path);
Ok(process)

View file

@ -1,14 +1,7 @@
use actix_web::web::Bytes;
use futures_core::Stream;
use std::{
collections::{vec_deque::IntoIter, VecDeque},
convert::Infallible,
pin::Pin,
task::{Context, Poll},
};
use std::collections::{vec_deque::IntoIter, VecDeque};
use streem::IntoStreamer;
use tokio::io::AsyncRead;
use tokio_util::bytes::Buf;
#[derive(Clone, Debug)]
pub(crate) struct BytesStream {
@ -36,12 +29,13 @@ impl BytesStream {
while let Some(bytes) = stream.try_next().await? {
tracing::trace!("try_from_stream: looping");
bs.add_bytes(bytes);
crate::sync::cooperate().await;
}
tracing::debug!(
"BytesStream with {} chunks, avg length {}",
bs.chunks_len(),
bs.len() / bs.chunks_len()
bs.len() / bs.chunks_len().max(1)
);
Ok(bs)
@ -64,21 +58,14 @@ impl BytesStream {
self.total_len == 0
}
pub(crate) fn into_reader(self) -> BytesReader {
BytesReader { inner: self.inner }
pub(crate) fn into_io_stream(self) -> impl Stream<Item = std::io::Result<Bytes>> {
crate::stream::error_injector(streem::from_fn(move |yielder| async move {
for bytes in self {
crate::sync::cooperate().await;
yielder.yield_ok(bytes).await;
}
}))
}
pub(crate) fn into_io_stream(self) -> IoStream {
IoStream { inner: self.inner }
}
}
pub(crate) struct IoStream {
inner: VecDeque<Bytes>,
}
pub(crate) struct BytesReader {
inner: VecDeque<Bytes>,
}
impl IntoIterator for BytesStream {
@ -89,57 +76,3 @@ impl IntoIterator for BytesStream {
self.inner.into_iter()
}
}
impl Stream for BytesStream {
type Item = Result<Bytes, Infallible>;
fn poll_next(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Poll::Ready(self.get_mut().inner.pop_front().map(Ok))
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.inner.len(), Some(self.inner.len()))
}
}
impl Stream for IoStream {
type Item = std::io::Result<Bytes>;
fn poll_next(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Poll::Ready(self.get_mut().inner.pop_front().map(Ok))
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.inner.len(), Some(self.inner.len()))
}
}
impl AsyncRead for BytesReader {
fn poll_read(
mut self: Pin<&mut Self>,
_: &mut Context<'_>,
buf: &mut tokio::io::ReadBuf<'_>,
) -> Poll<std::io::Result<()>> {
while buf.remaining() > 0 {
tracing::trace!("bytes reader: looping");
if let Some(bytes) = self.inner.front_mut() {
if bytes.is_empty() {
self.inner.pop_front();
continue;
}
let upper_bound = buf.remaining().min(bytes.len());
let slice = &bytes[..upper_bound];
buf.put_slice(slice);
bytes.advance(upper_bound);
} else {
break;
}
}
Poll::Ready(Ok(()))
}
}

View file

@ -1,172 +0,0 @@
use crate::{
details::Details,
error::{Error, UploadError},
repo::Hash,
};
use dashmap::{mapref::entry::Entry, DashMap};
use flume::{r#async::RecvFut, Receiver, Sender};
use std::{
future::Future,
path::PathBuf,
pin::Pin,
sync::Arc,
task::{Context, Poll},
};
use tracing::Span;
type OutcomeReceiver = Receiver<(Details, Arc<str>)>;
type ProcessMapKey = (Hash, PathBuf);
type ProcessMapInner = DashMap<ProcessMapKey, OutcomeReceiver>;
#[derive(Debug, Default, Clone)]
pub(crate) struct ProcessMap {
process_map: Arc<ProcessMapInner>,
}
impl ProcessMap {
pub(super) fn new() -> Self {
Self::default()
}
pub(super) async fn process<Fut>(
&self,
hash: Hash,
path: PathBuf,
fut: Fut,
) -> Result<(Details, Arc<str>), Error>
where
Fut: Future<Output = Result<(Details, Arc<str>), Error>>,
{
let key = (hash.clone(), path.clone());
let (sender, receiver) = flume::bounded(1);
let entry = self.process_map.entry(key.clone());
let (state, span) = match entry {
Entry::Vacant(vacant) => {
vacant.insert(receiver);
let span = tracing::info_span!(
"Processing image",
hash = ?hash,
path = ?path,
completed = &tracing::field::Empty,
);
metrics::counter!(crate::init_metrics::PROCESS_MAP_INSERTED).increment(1);
(CancelState::Sender { sender }, span)
}
Entry::Occupied(receiver) => {
let span = tracing::info_span!(
"Waiting for processed image",
hash = ?hash,
path = ?path,
);
let receiver = receiver.get().clone().into_recv_async();
(CancelState::Receiver { receiver }, span)
}
};
CancelSafeProcessor {
cancel_token: CancelToken {
span,
key,
state,
process_map: self.clone(),
},
fut,
}
.await
}
fn remove(&self, key: &ProcessMapKey) -> Option<OutcomeReceiver> {
self.process_map.remove(key).map(|(_, v)| v)
}
}
struct CancelToken {
span: Span,
key: ProcessMapKey,
state: CancelState,
process_map: ProcessMap,
}
enum CancelState {
Sender {
sender: Sender<(Details, Arc<str>)>,
},
Receiver {
receiver: RecvFut<'static, (Details, Arc<str>)>,
},
}
impl CancelState {
const fn is_sender(&self) -> bool {
matches!(self, Self::Sender { .. })
}
}
pin_project_lite::pin_project! {
struct CancelSafeProcessor<F> {
cancel_token: CancelToken,
#[pin]
fut: F,
}
}
impl<F> Future for CancelSafeProcessor<F>
where
F: Future<Output = Result<(Details, Arc<str>), Error>>,
{
type Output = Result<(Details, Arc<str>), Error>;
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let this = self.as_mut().project();
let span = &this.cancel_token.span;
let process_map = &this.cancel_token.process_map;
let state = &mut this.cancel_token.state;
let key = &this.cancel_token.key;
let fut = this.fut;
span.in_scope(|| match state {
CancelState::Sender { sender } => {
let res = std::task::ready!(fut.poll(cx));
if process_map.remove(key).is_some() {
metrics::counter!(crate::init_metrics::PROCESS_MAP_REMOVED).increment(1);
}
if let Ok(tup) = &res {
let _ = sender.try_send(tup.clone());
}
Poll::Ready(res)
}
CancelState::Receiver { ref mut receiver } => Pin::new(receiver)
.poll(cx)
.map(|res| res.map_err(|_| UploadError::Canceled.into())),
})
}
}
impl Drop for CancelToken {
fn drop(&mut self) {
if self.state.is_sender() {
let completed = self.process_map.remove(&self.key).is_none();
self.span.record("completed", completed);
if !completed {
metrics::counter!(crate::init_metrics::PROCESS_MAP_REMOVED).increment(1);
}
}
}
}

View file

@ -18,6 +18,8 @@ impl Args {
log_format,
log_targets,
log_spans,
log_requests,
no_log_ansi,
console_address,
console_buffer_capacity,
opentelemetry_url,
@ -38,6 +40,8 @@ impl Args {
format: log_format,
targets: log_targets.map(Serde::new),
log_spans,
no_ansi: no_log_ansi,
log_requests,
},
console: Console {
address: console_address,
@ -55,6 +59,7 @@ impl Args {
address,
api_key,
temporary_directory,
no_cleanup_temporary_directory,
certificate,
private_key,
client_timeout,
@ -122,6 +127,7 @@ impl Args {
danger_dummy_mode,
max_file_count,
temporary_directory,
cleanup_temporary_directory: !no_cleanup_temporary_directory,
certificate,
private_key,
};
@ -541,6 +547,7 @@ struct Server {
max_file_count: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
temporary_directory: Option<PathBuf>,
cleanup_temporary_directory: bool,
#[serde(skip_serializing_if = "Option::is_none")]
certificate: Option<PathBuf>,
#[serde(skip_serializing_if = "Option::is_none")]
@ -578,6 +585,10 @@ struct Logging {
targets: Option<Serde<Targets>>,
#[serde(skip_serializing_if = "std::ops::Not::not")]
log_spans: bool,
#[serde(skip_serializing_if = "std::ops::Not::not")]
log_requests: bool,
#[serde(skip_serializing_if = "std::ops::Not::not")]
no_ansi: bool,
}
#[derive(Debug, Default, serde::Serialize)]
@ -921,6 +932,13 @@ pub(super) struct Args {
/// Whether to log openning and closing of tracing spans to stdout
#[arg(long)]
log_spans: bool,
/// Whether to log request completions at an INFO level
#[arg(long)]
log_requests: bool,
#[arg(long)]
/// Whether to disable color-codes in log output
no_log_ansi: bool,
/// Address and port to expose tokio-console metrics
#[arg(long)]
@ -973,6 +991,10 @@ struct Run {
#[arg(long)]
temporary_directory: Option<PathBuf>,
/// Whether to attempt to clean files left behind from a previous run of pict-rs
#[arg(long)]
no_cleanup_temporary_directory: bool,
/// The path to the TLS certificate. Both the certificate and the private_key must be specified
/// to enable TLS
#[arg(long)]

View file

@ -24,6 +24,7 @@ struct ServerDefaults {
danger_dummy_mode: bool,
max_file_count: u32,
temporary_directory: PathBuf,
cleanup_temporary_directory: bool,
}
#[derive(Clone, Debug, serde::Serialize)]
@ -54,6 +55,8 @@ struct LoggingDefaults {
format: LogFormat,
targets: Serde<Targets>,
log_spans: bool,
log_requests: bool,
no_ansi: bool,
}
#[derive(Clone, Debug, serde::Serialize)]
@ -211,6 +214,7 @@ impl Default for ServerDefaults {
danger_dummy_mode: false,
max_file_count: 1,
temporary_directory: std::env::temp_dir(),
cleanup_temporary_directory: true,
}
}
}
@ -233,6 +237,8 @@ impl Default for LoggingDefaults {
format: LogFormat::Normal,
targets: "info".parse().expect("Valid targets string"),
log_spans: false,
log_requests: false,
no_ansi: false,
}
}
}

View file

@ -119,6 +119,8 @@ pub(crate) struct Server {
pub(crate) temporary_directory: PathBuf,
pub(crate) cleanup_temporary_directory: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub(crate) certificate: Option<PathBuf>,
@ -161,6 +163,10 @@ pub(crate) struct Logging {
pub(crate) targets: Serde<Targets>,
pub(crate) log_spans: bool,
pub(crate) no_ansi: bool,
pub(crate) log_requests: bool,
}
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]

View file

@ -2,9 +2,7 @@ mod exiftool;
mod ffmpeg;
mod magick;
use crate::{bytes_stream::BytesStream, formats::InputFile, state::State};
use crate::{bytes_stream::BytesStream, formats::InputFile, future::WithPollTimer, state::State};
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct Discovery {
@ -31,12 +29,17 @@ pub(crate) async fn discover_bytes_stream<S>(
state: &State<S>,
bytes: BytesStream,
) -> Result<Discovery, crate::error::Error> {
let discovery = ffmpeg::discover_bytes_stream(state, bytes.clone()).await?;
let discovery = ffmpeg::discover_bytes_stream(state, bytes.clone())
.with_poll_timer("discover-ffmpeg")
.await?;
let discovery = magick::confirm_bytes_stream(state, discovery, bytes.clone()).await?;
let discovery = magick::confirm_bytes_stream(state, discovery, bytes.clone())
.with_poll_timer("confirm-imagemagick")
.await?;
let discovery =
exiftool::check_reorient(discovery, bytes, state.config.media.process_timeout).await?;
let discovery = exiftool::check_reorient(discovery, bytes, state.config.media.process_timeout)
.with_poll_timer("reorient-exiftool")
.await?;
Ok(discovery)
}

View file

@ -40,8 +40,9 @@ pub(super) async fn check_reorient(
#[tracing::instrument(level = "trace", skip_all)]
async fn needs_reorienting(input: BytesStream, timeout: u64) -> Result<bool, ExifError> {
let buf = Process::run("exiftool", &["-n", "-Orientation", "-"], &[], timeout)?
.drive_with_async_read(input.into_reader())
let buf = Process::run("exiftool", &["-n", "-Orientation", "-"], &[], timeout)
.await?
.drive_with_stream(input.into_io_stream())
.into_string()
.await?;

View file

@ -10,6 +10,7 @@ use crate::{
AlphaCodec, AnimationFormat, ImageFormat, ImageInput, InputFile, InputVideoFormat,
Mp4AudioCodec, Mp4Codec, WebmAlphaCodec, WebmAudioCodec, WebmCodec,
},
future::WithPollTimer,
process::Process,
state::State,
};
@ -52,6 +53,7 @@ impl FfMpegStreams {
FfMpegStream::Unknown { codec_name } => {
tracing::info!("Encountered unknown stream {codec_name}");
}
FfMpegStream::Empty {} => {}
}
}
@ -134,6 +136,7 @@ enum FfMpegStream {
Audio(FfMpegAudioStream),
Video(FfMpegVideoStream),
Unknown { codec_name: String },
Empty {},
}
#[derive(Debug, serde::Deserialize)]
@ -177,7 +180,8 @@ pub(super) async fn discover_bytes_stream<S>(
bytes: BytesStream,
) -> Result<Option<Discovery>, FfMpegError> {
let output = crate::ffmpeg::with_file(&state.tmp_dir, None, |path| async move {
crate::file::write_from_async_read(&path, bytes.into_reader())
crate::file::write_from_stream(&path, bytes.into_io_stream())
.with_poll_timer("discover-ffmpeg-write-file")
.await
.map_err(FfMpegError::Write)?;
@ -197,9 +201,11 @@ pub(super) async fn discover_bytes_stream<S>(
],
&[],
state.config.media.process_timeout,
)?
)
.await?
.read()
.into_vec()
.with_poll_timer("discover-ffmpeg-into-vec")
.await
.map_err(FfMpegError::Process)
})
@ -242,7 +248,8 @@ async fn alpha_pixel_formats(timeout: u64) -> Result<HashSet<String>, FfMpegErro
],
&[],
timeout,
)?
)
.await?
.read()
.into_vec()
.await?;

View file

@ -0,0 +1,35 @@
{
"programs": [
],
"streams": [
{
"codec_name": "hevc",
"width": 1920,
"height": 1080,
"pix_fmt": "yuv420p10le",
"nb_read_frames": "187",
"side_data_list": [
{
}
]
},
{
"codec_name": "aac",
"nb_read_frames": "135"
},
{
},
{
},
{
}
],
"format": {
"format_name": "mov,mp4,m4a,3gp,3g2,mj2"
}
}

View file

@ -1,11 +1,11 @@
use crate::formats::{
AlphaCodec, AnimationFormat, ImageFormat, ImageInput, InputFile, InputVideoFormat, Mp4Codec,
WebmAlphaCodec, WebmCodec,
AlphaCodec, AnimationFormat, ImageFormat, ImageInput, InputFile, InputVideoFormat,
Mp4AudioCodec, Mp4Codec, WebmAlphaCodec, WebmCodec,
};
use super::{Discovery, FfMpegDiscovery, PixelFormatOutput};
fn details_tests() -> [(&'static str, Option<Discovery>); 13] {
fn details_tests() -> [(&'static str, Option<Discovery>); 14] {
[
(
"animated_webp",
@ -151,6 +151,18 @@ fn details_tests() -> [(&'static str, Option<Discovery>); 13] {
frames: None,
}),
),
(
"mov",
Some(Discovery {
input: InputFile::Video(InputVideoFormat::Mp4 {
video_codec: Mp4Codec::H265,
audio_codec: Some(Mp4AudioCodec::Aac),
}),
width: 1920,
height: 1080,
frames: Some(187),
}),
),
]
}

View file

@ -72,8 +72,9 @@ async fn discover<S>(state: &State<S>, stream: BytesStream) -> Result<Discovery,
],
&envs,
state.config.media.process_timeout,
)?
.drive_with_async_read(stream.into_reader())
)
.await?
.drive_with_stream(stream.into_io_stream())
.into_string()
.await;

View file

@ -82,7 +82,7 @@ pub(crate) enum UploadError {
Io(#[from] std::io::Error),
#[error("Error validating upload")]
Validation(#[from] crate::validate::ValidationError),
Validation(#[from] crate::ingest::ValidationError),
#[error("Error in store")]
Store(#[source] crate::store::StoreError),
@ -108,6 +108,15 @@ pub(crate) enum UploadError {
#[error("Error in request response")]
Request(#[from] reqwest::Error),
#[error("Invalid job popped from job queue: {1}")]
InvalidJob(#[source] serde_json::Error, String),
#[error("Invalid query supplied")]
InvalidQuery(#[source] actix_web::error::QueryPayloadError),
#[error("Invalid json supplied")]
InvalidJson(#[source] actix_web::error::JsonPayloadError),
#[error("pict-rs is in read-only mode")]
ReadOnly,
@ -164,6 +173,10 @@ pub(crate) enum UploadError {
#[error("Failed external validation")]
FailedExternalValidation,
#[cfg(feature = "random-errors")]
#[error("Randomly generated error for testing purposes")]
RandomError,
}
impl UploadError {
@ -201,6 +214,11 @@ impl UploadError {
Self::Timeout(_) | Self::AggregateTimeout => ErrorCode::STREAM_TOO_SLOW,
Self::ProcessTimeout => ErrorCode::COMMAND_TIMEOUT,
Self::FailedExternalValidation => ErrorCode::FAILED_EXTERNAL_VALIDATION,
Self::InvalidJob(_, _) => ErrorCode::INVALID_JOB,
Self::InvalidQuery(_) => ErrorCode::INVALID_QUERY,
Self::InvalidJson(_) => ErrorCode::INVALID_JSON,
#[cfg(feature = "random-errors")]
Self::RandomError => ErrorCode::RANDOM_ERROR,
}
}
@ -238,7 +256,7 @@ impl ResponseError for Error {
fn status_code(&self) -> StatusCode {
match self.kind() {
Some(UploadError::Upload(actix_form_data::Error::FileSize))
| Some(UploadError::Validation(crate::validate::ValidationError::Filesize)) => {
| Some(UploadError::Validation(crate::ingest::ValidationError::Filesize)) => {
StatusCode::PAYLOAD_TOO_LARGE
}
Some(
@ -251,6 +269,8 @@ impl ResponseError for Error {
))
| UploadError::Repo(crate::repo::RepoError::AlreadyClaimed)
| UploadError::Validation(_)
| UploadError::InvalidQuery(_)
| UploadError::InvalidJson(_)
| UploadError::UnsupportedProcessExtension
| UploadError::ReadOnly
| UploadError::FailedExternalValidation

View file

@ -100,6 +100,9 @@ impl ErrorCode {
pub(crate) const VIDEO_DISABLED: ErrorCode = ErrorCode {
code: "video-disabled",
};
pub(crate) const MEDIA_DISALLOWED: ErrorCode = ErrorCode {
code: "media-disallowed",
};
pub(crate) const HTTP_CLIENT_ERROR: ErrorCode = ErrorCode {
code: "http-client-error",
};
@ -144,4 +147,17 @@ impl ErrorCode {
pub(crate) const FAILED_EXTERNAL_VALIDATION: ErrorCode = ErrorCode {
code: "failed-external-validation",
};
pub(crate) const INVALID_JOB: ErrorCode = ErrorCode {
code: "invalid-job",
};
pub(crate) const INVALID_QUERY: ErrorCode = ErrorCode {
code: "invalid-query",
};
pub(crate) const INVALID_JSON: ErrorCode = ErrorCode {
code: "invalid-json",
};
#[cfg(feature = "random-errors")]
pub(crate) const RANDOM_ERROR: ErrorCode = ErrorCode {
code: "random-error",
};
}

View file

@ -1,7 +1,6 @@
use std::path::Path;
use futures_core::Stream;
use tokio::io::AsyncRead;
use tokio_util::bytes::Bytes;
#[cfg(feature = "io-uring")]
@ -10,35 +9,32 @@ pub(crate) use io_uring::File;
#[cfg(not(feature = "io-uring"))]
pub(crate) use tokio_file::File;
use crate::future::WithPollTimer;
pub(crate) async fn write_from_stream(
path: impl AsRef<Path>,
stream: impl Stream<Item = std::io::Result<Bytes>>,
) -> std::io::Result<()> {
let mut file = File::create(path).await?;
file.write_from_stream(stream).await?;
file.close().await?;
Ok(())
}
pub(crate) async fn write_from_async_read(
path: impl AsRef<Path>,
reader: impl AsyncRead,
) -> std::io::Result<()> {
let mut file = File::create(path).await?;
file.write_from_async_read(reader).await?;
let mut file = File::create(path).with_poll_timer("create-file").await?;
file.write_from_stream(stream)
.with_poll_timer("write-from-stream")
.await?;
file.close().await?;
Ok(())
}
#[cfg(not(feature = "io-uring"))]
mod tokio_file {
use crate::{store::file_store::FileError, Either};
use crate::{future::WithPollTimer, store::file_store::FileError, Either};
use actix_web::web::{Bytes, BytesMut};
use futures_core::Stream;
use std::{io::SeekFrom, path::Path};
use streem::IntoStreamer;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt};
use tokio_util::codec::{BytesCodec, FramedRead};
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
use tokio_util::{
bytes::Buf,
codec::{BytesCodec, FramedRead},
};
pub(crate) struct File {
inner: tokio::fs::File,
@ -68,38 +64,26 @@ mod tokio_file {
let stream = std::pin::pin!(stream);
let mut stream = stream.into_streamer();
while let Some(res) = stream.next().await {
while let Some(mut bytes) = stream.try_next().with_poll_timer("try-next").await? {
tracing::trace!("write_from_stream: looping");
let mut bytes = res?;
while bytes.has_remaining() {
self.inner
.write_buf(&mut bytes)
.with_poll_timer("write-buf")
.await?;
self.inner.write_all_buf(&mut bytes).await?;
crate::sync::cooperate().await;
}
}
Ok(())
}
pub(crate) async fn write_from_async_read<R>(&mut self, reader: R) -> std::io::Result<()>
where
R: AsyncRead,
{
let mut reader = std::pin::pin!(reader);
tokio::io::copy(&mut reader, &mut self.inner).await?;
Ok(())
}
pub(crate) async fn close(self) -> std::io::Result<()> {
Ok(())
}
pub(crate) async fn read_to_async_write<W>(&mut self, writer: &mut W) -> std::io::Result<()>
where
W: AsyncWrite + Unpin + ?Sized,
{
tokio::io::copy(&mut self.inner, writer).await?;
Ok(())
}
pub(crate) async fn read_to_stream(
mut self,
from_start: Option<u64>,
@ -137,7 +121,6 @@ mod io_uring {
path::{Path, PathBuf},
};
use streem::IntoStreamer;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
use tokio_uring::{
buf::{IoBuf, IoBufMut},
BufResult,
@ -220,98 +203,10 @@ mod io_uring {
Ok(())
}
#[tracing::instrument(level = "debug", skip_all)]
pub(crate) async fn write_from_async_read<R>(&mut self, reader: R) -> std::io::Result<()>
where
R: AsyncRead,
{
let mut reader = std::pin::pin!(reader);
let mut cursor: u64 = 0;
loop {
tracing::trace!("write_from_async_read: looping");
let max_size = 65_536;
let mut buf = Vec::with_capacity(max_size.try_into().unwrap());
let n = (&mut reader).take(max_size).read_buf(&mut buf).await?;
if n == 0 {
break;
}
let mut position = 0;
loop {
tracing::trace!("write_from_async_read: looping inner");
if position == n {
break;
}
let position_u64: u64 = position.try_into().unwrap();
let (res, slice) = self
.write_at(buf.slice(position..n), cursor + position_u64)
.await;
let n = res?;
if n == 0 {
return Err(std::io::ErrorKind::UnexpectedEof.into());
}
position += n;
buf = slice.into_inner();
}
let position: u64 = position.try_into().unwrap();
cursor += position;
}
self.inner.sync_all().await?;
Ok(())
}
pub(crate) async fn close(self) -> std::io::Result<()> {
self.inner.close().await
}
pub(crate) async fn read_to_async_write<W>(&mut self, writer: &mut W) -> std::io::Result<()>
where
W: AsyncWrite + Unpin + ?Sized,
{
let metadata = self.metadata().await?;
let size = metadata.len();
let mut cursor: u64 = 0;
loop {
tracing::trace!("read_to_async_write: looping");
if cursor == size {
break;
}
let max_size = (size - cursor).min(65_536);
let buf = BytesMut::with_capacity(max_size.try_into().unwrap());
let (res, buf): (_, BytesMut) = self.read_at(buf, cursor).await;
let n: usize = res?;
if n == 0 {
return Err(std::io::ErrorKind::UnexpectedEof.into());
}
writer.write_all(&buf[0..n]).await?;
let n: u64 = n.try_into().unwrap();
cursor += n;
}
Ok(())
}
pub(crate) async fn read_to_stream(
self,
from_start: Option<u64>,
@ -380,6 +275,8 @@ mod io_uring {
#[cfg(test)]
mod tests {
use std::io::Read;
use streem::IntoStreamer;
use tokio::io::AsyncWriteExt;
macro_rules! test_async {
($fut:expr) => {
@ -395,9 +292,16 @@ mod io_uring {
let tmp = "/tmp/read-test";
test_async!(async move {
let mut file = super::File::open(EARTH_GIF).await.unwrap();
let file = super::File::open(EARTH_GIF).await.unwrap();
let mut tmp_file = tokio::fs::File::create(tmp).await.unwrap();
file.read_to_async_write(&mut tmp_file).await.unwrap();
let stream = file.read_to_stream(None, None).await.unwrap();
let stream = std::pin::pin!(stream);
let mut stream = stream.into_streamer();
while let Some(mut bytes) = stream.try_next().await.unwrap() {
tmp_file.write_all_buf(&mut bytes).await.unwrap();
}
});
let mut source = std::fs::File::open(EARTH_GIF).unwrap();
@ -421,9 +325,12 @@ mod io_uring {
let tmp = "/tmp/write-test";
test_async!(async move {
let mut file = tokio::fs::File::open(EARTH_GIF).await.unwrap();
let file = tokio::fs::File::open(EARTH_GIF).await.unwrap();
let mut tmp_file = super::File::create(tmp).await.unwrap();
tmp_file.write_from_async_read(&mut file).await.unwrap();
tmp_file
.write_from_stream(tokio_util::io::ReaderStream::new(file))
.await
.unwrap();
});
let mut source = std::fs::File::open(EARTH_GIF).unwrap();

View file

@ -59,9 +59,19 @@ pub(crate) trait WithMetrics: Future {
}
}
pub(crate) trait WithPollTimer: Future {
fn with_poll_timer(self, name: &'static str) -> PollTimer<Self>
where
Self: Sized,
{
PollTimer { name, inner: self }
}
}
impl<F> NowOrNever for F where F: Future {}
impl<F> WithMetrics for F where F: Future {}
impl<F> WithTimeout for F where F: Future {}
impl<F> WithPollTimer for F where F: Future {}
pin_project_lite::pin_project! {
pub(crate) struct MetricsFuture<F> {
@ -104,3 +114,79 @@ impl Drop for Metrics {
.record(self.start.elapsed().as_secs_f64());
}
}
pin_project_lite::pin_project! {
pub(crate) struct PollTimer<F> {
name: &'static str,
#[pin]
inner: F,
}
}
impl<F> Future for PollTimer<F>
where
F: Future,
{
type Output = F::Output;
fn poll(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Self::Output> {
let start = Instant::now();
let this = self.project();
let out = this.inner.poll(cx);
let elapsed = start.elapsed();
if elapsed > Duration::from_micros(10) {
metrics::counter!(crate::init_metrics::FUTURE_POLL_TIMER_EXCEEDED, "timer" => this.name.to_string()).increment(1);
metrics::histogram!(crate::init_metrics::FUTURE_POLL_TIMER_EXCEEDED_SECONDS, "timer" => this.name.to_string()).record(elapsed.as_secs_f64());
}
if elapsed > Duration::from_secs(1) {
#[cfg(feature = "poll-timer-warnings")]
tracing::warn!(
"Future {} polled for {} seconds",
this.name,
elapsed.as_secs()
);
#[cfg(not(feature = "poll-timer-warnings"))]
tracing::debug!(
"Future {} polled for {} seconds",
this.name,
elapsed.as_secs()
);
} else if elapsed > Duration::from_millis(1) {
#[cfg(feature = "poll-timer-warnings")]
tracing::warn!("Future {} polled for {} ms", this.name, elapsed.as_millis());
#[cfg(not(feature = "poll-timer-warnings"))]
tracing::debug!("Future {} polled for {} ms", this.name, elapsed.as_millis());
} else if elapsed > Duration::from_micros(200) {
#[cfg(feature = "poll-timer-warnings")]
tracing::debug!(
"Future {} polled for {} microseconds",
this.name,
elapsed.as_micros(),
);
#[cfg(not(feature = "poll-timer-warnings"))]
tracing::trace!(
"Future {} polled for {} microseconds",
this.name,
elapsed.as_micros(),
);
} else if elapsed > Duration::from_micros(1) {
tracing::trace!(
"Future {} polled for {} microseconds",
this.name,
elapsed.as_micros()
);
}
out
}
}

View file

@ -2,18 +2,17 @@ mod ffmpeg;
mod magick;
use crate::{
concurrent_processor::ProcessMap,
details::Details,
error::{Error, UploadError},
formats::{ImageFormat, InputProcessableFormat, InternalVideoFormat, ProcessableFormat},
future::{WithMetrics, WithTimeout},
repo::{Hash, VariantAlreadyExists},
future::{WithMetrics, WithPollTimer, WithTimeout},
repo::{Hash, NotificationEntry, VariantAlreadyExists},
state::State,
store::Store,
};
use std::{
path::PathBuf,
future::Future,
sync::Arc,
time::{Duration, Instant},
};
@ -48,13 +47,12 @@ impl Drop for MetricsGuard {
}
}
#[tracing::instrument(skip(state, process_map, hash))]
#[tracing::instrument(skip(state, original_details, hash))]
pub(crate) async fn generate<S: Store + 'static>(
state: &State<S>,
process_map: &ProcessMap,
format: InputProcessableFormat,
thumbnail_path: PathBuf,
thumbnail_args: Vec<String>,
variant: String,
variant_args: Vec<String>,
original_details: &Details,
hash: Hash,
) -> Result<(Details, Arc<str>), Error> {
@ -67,23 +65,122 @@ pub(crate) async fn generate<S: Store + 'static>(
Ok((original_details.clone(), identifier))
} else {
let process_fut = process(
state,
format,
thumbnail_path.clone(),
thumbnail_args,
original_details,
hash.clone(),
);
let mut attempts = 0;
let tup = loop {
if attempts > 2 {
return Err(UploadError::ProcessTimeout.into());
}
let (details, identifier) = process_map
.process(hash, thumbnail_path, process_fut)
.with_timeout(Duration::from_secs(state.config.media.process_timeout * 4))
.with_metrics(crate::init_metrics::GENERATE_PROCESS)
.await
.map_err(|_| UploadError::ProcessTimeout)??;
match state
.repo
.claim_variant_processing_rights(hash.clone(), variant.clone())
.await?
{
Ok(()) => {
// process
let process_future = process(
state,
format,
variant.clone(),
variant_args,
original_details,
hash.clone(),
)
.with_poll_timer("process-future");
Ok((details, identifier))
let res = heartbeat(state, hash.clone(), variant.clone(), process_future)
.with_poll_timer("heartbeat-future")
.with_timeout(Duration::from_secs(state.config.media.process_timeout * 4))
.with_metrics(crate::init_metrics::GENERATE_PROCESS)
.await
.map_err(|_| Error::from(UploadError::ProcessTimeout));
state
.repo
.notify_variant(hash.clone(), variant.clone())
.await?;
break res???;
}
Err(entry) => {
if let Some(tuple) = wait_timeout(
hash.clone(),
variant.clone(),
entry,
state,
Duration::from_secs(20),
)
.await?
{
break tuple;
}
attempts += 1;
}
}
};
Ok(tup)
}
}
pub(crate) async fn wait_timeout<S: Store + 'static>(
hash: Hash,
variant: String,
mut entry: NotificationEntry,
state: &State<S>,
timeout: Duration,
) -> Result<Option<(Details, Arc<str>)>, Error> {
let notified = entry.notified_timeout(timeout);
if let Some(identifier) = state
.repo
.variant_identifier(hash.clone(), variant.clone())
.await?
{
let details = crate::ensure_details_identifier(state, &identifier).await?;
return Ok(Some((details, identifier)));
}
match notified.await {
Ok(()) => tracing::debug!("notified"),
Err(_) => tracing::debug!("timeout"),
}
Ok(None)
}
async fn heartbeat<S, O>(
state: &State<S>,
hash: Hash,
variant: String,
future: impl Future<Output = O>,
) -> Result<O, Error> {
let repo = state.repo.clone();
let handle = crate::sync::abort_on_drop(crate::sync::spawn("heartbeat-task", async move {
let mut interval = tokio::time::interval(Duration::from_secs(5));
loop {
interval.tick().await;
if let Err(e) = repo.variant_heartbeat(hash.clone(), variant.clone()).await {
break Error::from(e);
}
}
}));
let future = std::pin::pin!(future);
tokio::select! {
biased;
output = future => {
Ok(output)
}
res = handle => {
Err(res.map_err(|_| UploadError::Canceled)?)
}
}
}
@ -91,8 +188,8 @@ pub(crate) async fn generate<S: Store + 'static>(
async fn process<S: Store + 'static>(
state: &State<S>,
output_format: InputProcessableFormat,
thumbnail_path: PathBuf,
thumbnail_args: Vec<String>,
variant: String,
variant_args: Vec<String>,
original_details: &Details,
hash: Hash,
) -> Result<(Details, Arc<str>), Error> {
@ -118,7 +215,7 @@ async fn process<S: Store + 'static>(
let stream = state.store.to_stream(&identifier, None, None).await?;
let bytes =
crate::magick::process_image_command(state, thumbnail_args, input_format, format, quality)
crate::magick::process_image_command(state, variant_args, input_format, format, quality)
.await?
.drive_with_stream(stream)
.into_bytes_stream()
@ -140,19 +237,21 @@ async fn process<S: Store + 'static>(
)
.await?;
if let Err(VariantAlreadyExists) = state
let identifier = if let Err(VariantAlreadyExists) = state
.repo
.relate_variant_identifier(
hash,
thumbnail_path.to_string_lossy().to_string(),
&identifier,
)
.relate_variant_identifier(hash.clone(), variant.clone(), &identifier)
.await?
{
state.store.remove(&identifier).await?;
}
state.repo.relate_details(&identifier, &details).await?;
state
.repo
.variant_identifier(hash, variant)
.await?
.ok_or(UploadError::MissingIdentifier)?
} else {
state.repo.relate_details(&identifier, &details).await?;
identifier
};
guard.disarm();
@ -221,7 +320,11 @@ where
.with_stdout(|stdout| async {
state
.store
.save_async_read(stdout, media_type, Some(file_extension))
.save_stream(
tokio_util::io::ReaderStream::with_capacity(stdout, 1024 * 64),
media_type,
Some(file_extension),
)
.await
})
.await??;

View file

@ -97,7 +97,8 @@ pub(super) async fn thumbnail<S: Store>(
],
&[],
state.config.media.process_timeout,
)?
)
.await?
.wait()
.await
.map_err(FfMpegError::Process)?;

View file

@ -43,7 +43,8 @@ pub(super) async fn thumbnail_command<S>(
(MAGICK_CONFIGURE_PATH, state.policy_dir.as_os_str()),
];
let process = Process::run("magick", &args, &envs, state.config.media.process_timeout)?
let process = Process::run("magick", &args, &envs, state.config.media.process_timeout)
.await?
.add_extras(temporary_path);
Ok(process)

3
src/http1.rs Normal file
View file

@ -0,0 +1,3 @@
pub(crate) fn to_actix_status(status: reqwest::StatusCode) -> actix_web::http::StatusCode {
actix_web::http::StatusCode::from_u16(status.as_u16()).expect("status codes are always valid")
}

View file

@ -1,3 +1,6 @@
mod hasher;
mod validate;
use std::{cell::RefCell, rc::Rc, sync::Arc, time::Duration};
use crate::{
@ -5,20 +8,21 @@ use crate::{
details::Details,
error::{Error, UploadError},
formats::InternalFormat,
future::WithMetrics,
future::{WithMetrics, WithPollTimer},
repo::{Alias, ArcRepo, DeleteToken, Hash},
state::State,
store::Store,
UploadQuery,
};
use actix_web::web::Bytes;
use futures_core::Stream;
use reqwest::Body;
use tracing::{Instrument, Span};
mod hasher;
use hasher::Hasher;
pub(crate) use validate::ValidationError;
#[derive(Debug)]
pub(crate) struct Session {
repo: ArcRepo,
@ -31,6 +35,7 @@ pub(crate) struct Session {
async fn process_ingest<S>(
state: &State<S>,
stream: impl Stream<Item = Result<Bytes, Error>>,
upload_query: &UploadQuery,
) -> Result<
(
InternalFormat,
@ -47,15 +52,25 @@ where
Duration::from_secs(60),
BytesStream::try_from_stream(stream),
)
.with_poll_timer("try-from-stream")
.await
.map_err(|_| UploadError::AggregateTimeout)??;
let permit = crate::process_semaphore().acquire().await?;
tracing::trace!("Validating bytes");
let (input_type, process_read) = crate::validate::validate_bytes_stream(state, bytes).await?;
let (input_type, process_read) =
validate::validate_bytes_stream(state, bytes, &upload_query.limits)
.with_poll_timer("validate-bytes-stream")
.await?;
let process_read = if let Some(operations) = state.config.media.preprocess_steps() {
let operations = if upload_query.operations.is_empty() {
state.config.media.preprocess_steps()
} else {
Some(upload_query.operations.as_ref())
};
let process_read = if let Some(operations) = operations {
if let Some(format) = input_type.processable_format() {
let (_, magick_args) =
crate::processor::build_chain(operations, format.file_extension())?;
@ -88,18 +103,22 @@ where
state
.store
.save_async_read(
hasher_reader,
.save_stream(
tokio_util::io::ReaderStream::with_capacity(hasher_reader, 1024 * 64),
input_type.media_type(),
Some(input_type.file_extension()),
)
.with_poll_timer("save-hasher-reader")
.await
.map(move |identifier| (hash_state, identifier))
})
.with_poll_timer("save-process-stdout")
.await??;
let bytes_stream = state.store.to_bytes(&identifier, None, None).await?;
let details = Details::from_bytes_stream(state, bytes_stream).await?;
let details = Details::from_bytes_stream(state, bytes_stream)
.with_poll_timer("details-from-bytes-stream")
.await?;
drop(permit);
@ -135,8 +154,8 @@ where
let identifier = state
.store
.save_async_read(
hasher_reader,
.save_stream(
tokio_util::io::ReaderStream::with_capacity(hasher_reader, 1024 * 64),
input_type.media_type(),
Some(input_type.file_extension()),
)
@ -152,6 +171,7 @@ pub(crate) async fn ingest<S>(
state: &State<S>,
stream: impl Stream<Item = Result<Bytes, Error>>,
declared_alias: Option<Alias>,
upload_query: &UploadQuery,
) -> Result<Session, Error>
where
S: Store,
@ -159,7 +179,9 @@ where
let (input_type, identifier, details, hash_state) = if state.config.server.danger_dummy_mode {
dummy_ingest(state, stream).await?
} else {
process_ingest(state, stream).await?
process_ingest(state, stream, upload_query)
.with_poll_timer("ingest-future")
.await?
};
let mut session = Session {

View file

@ -11,8 +11,10 @@ use crate::{
AnimationFormat, AnimationOutput, ImageInput, ImageOutput, InputFile, InputVideoFormat,
InternalFormat,
},
future::WithPollTimer,
process::{Process, ProcessRead},
state::State,
UploadLimits,
};
#[derive(Debug, thiserror::Error)]
@ -37,6 +39,9 @@ pub(crate) enum ValidationError {
#[error("Video is disabled")]
VideoDisabled,
#[error("Media type wasn't allowed for this upload")]
MediaDisallowed,
}
impl ValidationError {
@ -49,6 +54,7 @@ impl ValidationError {
Self::Empty => ErrorCode::VALIDATE_FILE_EMPTY,
Self::Filesize => ErrorCode::VALIDATE_FILE_SIZE,
Self::VideoDisabled => ErrorCode::VIDEO_DISABLED,
Self::MediaDisallowed => ErrorCode::MEDIA_DISALLOWED,
}
}
}
@ -59,6 +65,7 @@ const MEGABYTES: usize = 1024 * 1024;
pub(crate) async fn validate_bytes_stream<S>(
state: &State<S>,
bytes: BytesStream,
upload_limits: &UploadLimits,
) -> Result<(InternalFormat, ProcessRead), Error> {
if bytes.is_empty() {
return Err(ValidationError::Empty.into());
@ -69,16 +76,20 @@ pub(crate) async fn validate_bytes_stream<S>(
width,
height,
frames,
} = crate::discover::discover_bytes_stream(state, bytes.clone()).await?;
} = crate::discover::discover_bytes_stream(state, bytes.clone())
.with_poll_timer("discover-bytes-stream")
.await?;
validate_upload(bytes.len(), width, height, frames, upload_limits)?;
match &input {
InputFile::Image(input) => {
InputFile::Image(input) if *upload_limits.allow_image => {
let (format, process) =
process_image_command(state, *input, bytes.len(), width, height).await?;
Ok((format, process.drive_with_async_read(bytes.into_reader())))
Ok((format, process.drive_with_stream(bytes.into_io_stream())))
}
InputFile::Animation(input) => {
InputFile::Animation(input) if *upload_limits.allow_animation => {
let (format, process) = process_animation_command(
state,
*input,
@ -89,22 +100,69 @@ pub(crate) async fn validate_bytes_stream<S>(
)
.await?;
Ok((format, process.drive_with_async_read(bytes.into_reader())))
Ok((format, process.drive_with_stream(bytes.into_io_stream())))
}
InputFile::Video(input) => {
InputFile::Video(input) if *upload_limits.allow_video => {
let (format, process_read) =
process_video(state, bytes, *input, width, height, frames.unwrap_or(1)).await?;
Ok((format, process_read))
}
_ => Err(ValidationError::MediaDisallowed.into()),
}
}
fn validate_upload(
size: usize,
width: u16,
height: u16,
frames: Option<u32>,
upload_limits: &UploadLimits,
) -> Result<(), ValidationError> {
if upload_limits
.max_width
.is_some_and(|max_width| width > *max_width)
{
return Err(ValidationError::Width);
}
if upload_limits
.max_height
.is_some_and(|max_height| height > *max_height)
{
return Err(ValidationError::Height);
}
if upload_limits
.max_frame_count
.zip(frames)
.is_some_and(|(max_frame_count, frames)| frames > *max_frame_count)
{
return Err(ValidationError::Frames);
}
if upload_limits
.max_area
.is_some_and(|max_area| u32::from(width) * u32::from(height) > *max_area)
{
return Err(ValidationError::Area);
}
if upload_limits
.max_file_size
.is_some_and(|max_file_size| size > *max_file_size * MEGABYTES)
{
return Err(ValidationError::Filesize);
}
Ok(())
}
#[tracing::instrument(skip(state))]
async fn process_image_command<S>(
state: &State<S>,
input: ImageInput,
length: usize,
size: usize,
width: u16,
height: u16,
) -> Result<(InternalFormat, Process), Error> {
@ -119,7 +177,7 @@ async fn process_image_command<S>(
if u32::from(width) * u32::from(height) > validations.max_area {
return Err(ValidationError::Area.into());
}
if length > validations.max_file_size * MEGABYTES {
if size > validations.max_file_size * MEGABYTES {
return Err(ValidationError::Filesize.into());
}
@ -133,7 +191,7 @@ async fn process_image_command<S>(
magick::convert_image_command(state, input.format, format, quality).await?
} else {
exiftool::clear_metadata_command(state.config.media.process_timeout)?
exiftool::clear_metadata_command(state.config.media.process_timeout).await?
};
Ok((InternalFormat::Image(format), process))
@ -169,14 +227,14 @@ fn validate_animation(
async fn process_animation_command<S>(
state: &State<S>,
input: AnimationFormat,
length: usize,
size: usize,
width: u16,
height: u16,
frames: u32,
) -> Result<(InternalFormat, Process), Error> {
let validations = &state.config.media.animation;
validate_animation(length, width, height, frames, validations)?;
validate_animation(size, width, height, frames, validations)?;
let AnimationOutput {
format,
@ -188,7 +246,7 @@ async fn process_animation_command<S>(
magick::convert_animation_command(state, input, format, quality).await?
} else {
exiftool::clear_metadata_command(state.config.media.process_timeout)?
exiftool::clear_metadata_command(state.config.media.process_timeout).await?
};
Ok((InternalFormat::Animation(format), process))
@ -252,6 +310,7 @@ async fn process_video<S>(
state.config.media.process_timeout,
bytes,
)
.with_poll_timer("transcode-bytes")
.await?;
Ok((

View file

@ -0,0 +1,6 @@
use crate::{exiftool::ExifError, process::Process};
#[tracing::instrument(level = "trace", skip_all)]
pub(super) async fn clear_metadata_command(timeout: u64) -> Result<Process, ExifError> {
Ok(Process::run("exiftool", &["-all=", "-", "-out", "-"], &[], timeout).await?)
}

View file

@ -6,6 +6,7 @@ use crate::{
bytes_stream::BytesStream,
ffmpeg::FfMpegError,
formats::{InputVideoFormat, OutputVideo},
future::WithPollTimer,
process::{Process, ProcessRead},
tmp_file::TmpDir,
};
@ -22,7 +23,8 @@ pub(super) async fn transcode_bytes(
let output_path = output_file.as_os_str();
let res = crate::ffmpeg::with_file(tmp_dir, None, |input_file| async move {
crate::file::write_from_async_read(&input_file, bytes.into_reader())
crate::file::write_from_stream(&input_file, bytes.into_io_stream())
.with_poll_timer("write-from-stream")
.await
.map_err(FfMpegError::Write)?;
@ -34,6 +36,7 @@ pub(super) async fn transcode_bytes(
crf,
timeout,
)
.with_poll_timer("transcode-files")
.await?;
let tmp_file = crate::file::File::open(output_path)
@ -132,7 +135,10 @@ async fn transcode_files(
output_path,
]);
Process::run("ffmpeg", &args, &[], timeout)?.wait().await?;
Process::run("ffmpeg", &args, &[], timeout)
.await?
.wait()
.await?;
Ok(())
}

View file

@ -80,7 +80,8 @@ async fn convert<S>(
(MAGICK_CONFIGURE_PATH, state.policy_dir.as_os_str()),
];
let process = Process::run("magick", &args, &envs, state.config.media.process_timeout)?
let process = Process::run("magick", &args, &envs, state.config.media.process_timeout)
.await?
.add_extras(temporary_path);
Ok(process)

View file

@ -1,5 +1,6 @@
pub(super) fn init_metrics() {
describe_toplevel();
describe_future();
describe_queue_cleanup();
describe_payload();
describe_job();
@ -26,6 +27,21 @@ fn describe_toplevel() {
pub(crate) const FILES: &str = "pict-rs.files";
pub(crate) const BACKGROUND_UPLOAD_CLAIM: &str = "pict-rs.background.upload.claim";
fn describe_future() {
metrics::describe_counter!(
FUTURE_POLL_TIMER_EXCEEDED,
"How many times a given poll operation has lasted longer than 10 microseconds"
);
metrics::describe_histogram!(
FUTURE_POLL_TIMER_EXCEEDED_SECONDS,
"Durations for polls lasting longer than 10 microseconds"
);
}
pub(crate) const FUTURE_POLL_TIMER_EXCEEDED: &str = "pict-rs.future.poll-timer.exceeded";
pub(crate) const FUTURE_POLL_TIMER_EXCEEDED_SECONDS: &str =
"pict-rs.future.poll-timer.exceeded.seconds";
fn describe_queue_cleanup() {
metrics::describe_counter!(
CLEANUP_OUTDATED_PROXY,
@ -344,6 +360,14 @@ fn describe_postgres() {
POSTGRES_QUEUE_HEARTBEAT,
"Timings for updating the provided job's keepalive heartbeat"
);
metrics::describe_histogram!(
POSTGRES_QUEUE_RETRY,
"Timings for updating retry count for a job"
);
metrics::describe_histogram!(
POSTGRES_QUEUE_CLEANUP,
"Timings for removing jobs with no more retries"
);
metrics::describe_histogram!(
POSTGRES_QUEUE_COMPLETE,
"Timings for removing a completed job from the queue"
@ -471,6 +495,8 @@ pub(crate) const POSTGRES_QUEUE_LISTEN: &str = "pict-rs.postgres.queue.listen";
pub(crate) const POSTGRES_QUEUE_REQUEUE: &str = "pict-rs.postgres.queue.requeue";
pub(crate) const POSTGRES_QUEUE_CLAIM: &str = "pict-rs.postgres.queue.claim";
pub(crate) const POSTGRES_QUEUE_HEARTBEAT: &str = "pict-rs.postgres.queue.heartbeat";
pub(crate) const POSTGRES_QUEUE_RETRY: &str = "pict-rs.postgres.queue.retry";
pub(crate) const POSTGRES_QUEUE_CLEANUP: &str = "pict-rs.postgres.queue.cleanup";
pub(crate) const POSTGRES_QUEUE_COMPLETE: &str = "pict-rs.postgres.queue.complete";
pub(crate) const POSTGRES_STORE_MIGRATION_COUNT: &str = "pict-rs.postgres.store-migration.count";
pub(crate) const POSTGRES_STORE_MIGRATION_MARK_MIGRATED: &str =

View file

@ -1,4 +1,5 @@
use crate::config::{LogFormat, OpenTelemetry, Tracing};
use color_eyre::config::Theme;
use console_subscriber::ConsoleLayer;
use opentelemetry::KeyValue;
use opentelemetry_otlp::WithExportConfig;
@ -11,7 +12,15 @@ use tracing_subscriber::{
};
pub(super) fn init_tracing(tracing: &Tracing) -> color_eyre::Result<()> {
color_eyre::install()?;
let eyre_theme = if tracing.logging.no_ansi {
Theme::new()
} else {
Theme::dark()
};
color_eyre::config::HookBuilder::new()
.theme(eyre_theme)
.install()?;
LogTracer::init()?;
@ -23,7 +32,9 @@ pub(super) fn init_tracing(tracing: &Tracing) -> color_eyre::Result<()> {
FmtSpan::NONE
};
let format_layer = tracing_subscriber::fmt::layer().with_span_events(fmt_span);
let format_layer = tracing_subscriber::fmt::layer()
.with_span_events(fmt_span)
.with_ansi(!tracing.logging.no_ansi);
match tracing.logging.format {
LogFormat::Compact => with_format(format_layer.compact(), tracing),

View file

@ -1,7 +1,6 @@
mod backgrounded;
mod blurhash;
mod bytes_stream;
mod concurrent_processor;
mod config;
mod details;
mod discover;
@ -15,6 +14,7 @@ mod file_path;
mod formats;
mod future;
mod generate;
mod http1;
mod ingest;
mod init_metrics;
mod init_tracing;
@ -35,7 +35,6 @@ mod stream;
mod sync;
mod tls;
mod tmp_file;
mod validate;
use actix_form_data::{Field, Form, FormData, Multipart, Value};
use actix_web::{
@ -43,50 +42,46 @@ use actix_web::{
http::header::{CacheControl, CacheDirective, LastModified, Range, ACCEPT_RANGES},
web, App, HttpRequest, HttpResponse, HttpResponseBuilder, HttpServer,
};
use details::{ApiDetails, HumanDate};
use future::WithTimeout;
use futures_core::Stream;
use magick::ArcPolicyDir;
use metrics_exporter_prometheus::PrometheusBuilder;
use middleware::{Metrics, Payload};
use repo::ArcRepo;
use reqwest_middleware::{ClientBuilder, ClientWithMiddleware};
use reqwest_tracing::TracingMiddleware;
use rustls_channel_resolver::ChannelSender;
use rusty_s3::UrlStyle;
use state::State;
use std::{
marker::PhantomData,
path::Path,
path::PathBuf,
rc::Rc,
sync::{Arc, OnceLock},
time::{Duration, SystemTime},
};
use streem::IntoStreamer;
use sync::DropHandle;
use tmp_file::{ArcTmpDir, TmpDir};
use tokio::sync::Semaphore;
use tracing::Instrument;
use tracing_actix_web::TracingLogger;
use self::{
backgrounded::Backgrounded,
concurrent_processor::ProcessMap,
config::{Configuration, Operation},
details::Details,
details::{ApiDetails, Details, HumanDate},
either::Either,
error::{Error, UploadError},
formats::InputProcessableFormat,
future::{WithPollTimer, WithTimeout},
ingest::Session,
init_tracing::init_tracing,
middleware::{Deadline, Internal},
magick::ArcPolicyDir,
middleware::{Deadline, Internal, Log, Metrics, Payload},
migrate_store::migrate_store,
queue::queue_generate,
repo::{sled::SledRepo, Alias, DeleteToken, Hash, Repo, UploadId, UploadResult},
repo::{sled::SledRepo, Alias, ArcRepo, DeleteToken, Hash, Repo, UploadId, UploadResult},
serde_str::Serde,
state::State,
store::{file_store::FileStore, object_store::ObjectStore, Store},
stream::empty,
sync::DropHandle,
tls::Tls,
tmp_file::{ArcTmpDir, TmpDir},
};
pub use self::config::{ConfigSource, PictRsConfiguration};
@ -123,6 +118,7 @@ async fn ensure_details<S: Store + 'static>(
ensure_details_identifier(state, &identifier).await
}
#[tracing::instrument(skip(state))]
async fn ensure_details_identifier<S: Store + 'static>(
state: &State<S>,
identifier: &Arc<str>,
@ -147,22 +143,64 @@ async fn ensure_details_identifier<S: Store + 'static>(
}
}
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[serde(default)]
struct UploadLimits {
max_width: Option<Serde<u16>>,
max_height: Option<Serde<u16>>,
max_area: Option<Serde<u32>>,
max_frame_count: Option<Serde<u32>>,
max_file_size: Option<Serde<usize>>,
allow_image: Serde<bool>,
allow_animation: Serde<bool>,
allow_video: Serde<bool>,
}
impl Default for UploadLimits {
fn default() -> Self {
Self {
max_width: None,
max_height: None,
max_area: None,
max_frame_count: None,
max_file_size: None,
allow_image: Serde::new(true),
allow_animation: Serde::new(true),
allow_video: Serde::new(true),
}
}
}
#[derive(Clone, Default, Debug, serde::Deserialize, serde::Serialize)]
struct UploadQuery {
#[serde(flatten)]
limits: UploadLimits,
#[serde(with = "tuple_vec_map", flatten)]
operations: Vec<(String, String)>,
}
struct Upload<S>(Value<Session>, PhantomData<S>);
impl<S: Store + 'static> FormData for Upload<S> {
type Item = Session;
type Error = Error;
fn form(req: &HttpRequest) -> Form<Self::Item, Self::Error> {
fn form(req: &HttpRequest) -> Result<Form<Self::Item, Self::Error>, Self::Error> {
let state = req
.app_data::<web::Data<State<S>>>()
.expect("No state in request")
.clone();
let web::Query(upload_query) = web::Query::<UploadQuery>::from_query(req.query_string())
.map_err(UploadError::InvalidQuery)?;
let upload_query = Rc::new(upload_query);
// Create a new Multipart Form validator
//
// This form is expecting a single array field, 'images' with at most 10 files in it
Form::new()
Ok(Form::new()
.max_files(state.config.server.max_file_count)
.max_file_size(state.config.media.max_file_size * MEGABYTES)
.transform_error(transform_error)
@ -170,6 +208,7 @@ impl<S: Store + 'static> FormData for Upload<S> {
"images",
Field::array(Field::file(move |filename, _, stream| {
let state = state.clone();
let upload_query = upload_query.clone();
metrics::counter!(crate::init_metrics::FILES, "upload" => "inline")
.increment(1);
@ -184,12 +223,13 @@ impl<S: Store + 'static> FormData for Upload<S> {
let stream = crate::stream::from_err(stream);
ingest::ingest(&state, stream, None).await
ingest::ingest(&state, stream, None, &upload_query).await
}
.with_poll_timer("file-upload")
.instrument(span),
)
})),
)
))
}
fn extract(value: Value<Self::Item>) -> Result<Self, Self::Error> {
@ -203,16 +243,21 @@ impl<S: Store + 'static> FormData for Import<S> {
type Item = Session;
type Error = Error;
fn form(req: &actix_web::HttpRequest) -> Form<Self::Item, Self::Error> {
fn form(req: &actix_web::HttpRequest) -> Result<Form<Self::Item, Self::Error>, Self::Error> {
let state = req
.app_data::<web::Data<State<S>>>()
.expect("No state in request")
.clone();
let web::Query(upload_query) = web::Query::<UploadQuery>::from_query(req.query_string())
.map_err(UploadError::InvalidQuery)?;
let upload_query = Rc::new(upload_query);
// Create a new Multipart Form validator for internal imports
//
// This form is expecting a single array field, 'images' with at most 10 files in it
Form::new()
Ok(Form::new()
.max_files(state.config.server.max_file_count)
.max_file_size(state.config.media.max_file_size * MEGABYTES)
.transform_error(transform_error)
@ -220,6 +265,7 @@ impl<S: Store + 'static> FormData for Import<S> {
"images",
Field::array(Field::file(move |filename, _, stream| {
let state = state.clone();
let upload_query = upload_query.clone();
metrics::counter!(crate::init_metrics::FILES, "import" => "inline")
.increment(1);
@ -234,13 +280,19 @@ impl<S: Store + 'static> FormData for Import<S> {
let stream = crate::stream::from_err(stream);
ingest::ingest(&state, stream, Some(Alias::from_existing(&filename)))
.await
ingest::ingest(
&state,
stream,
Some(Alias::from_existing(&filename)),
&upload_query,
)
.await
}
.with_poll_timer("file-import")
.instrument(span),
)
})),
)
))
}
fn extract(value: Value<Self::Item>) -> Result<Self, Self::Error>
@ -318,16 +370,16 @@ impl<S: Store + 'static> FormData for BackgroundedUpload<S> {
type Item = Backgrounded;
type Error = Error;
fn form(req: &actix_web::HttpRequest) -> Form<Self::Item, Self::Error> {
// Create a new Multipart Form validator for backgrounded uploads
//
// This form is expecting a single array field, 'images' with at most 10 files in it
fn form(req: &actix_web::HttpRequest) -> Result<Form<Self::Item, Self::Error>, Self::Error> {
let state = req
.app_data::<web::Data<State<S>>>()
.expect("No state in request")
.clone();
Form::new()
// Create a new Multipart Form validator for backgrounded uploads
//
// This form is expecting a single array field, 'images' with at most 10 files in it
Ok(Form::new()
.max_files(state.config.server.max_file_count)
.max_file_size(state.config.media.max_file_size * MEGABYTES)
.transform_error(transform_error)
@ -351,10 +403,11 @@ impl<S: Store + 'static> FormData for BackgroundedUpload<S> {
Backgrounded::proxy(&state, stream).await
}
.with_poll_timer("file-proxy")
.instrument(span),
)
})),
)
))
}
fn extract(value: Value<Self::Item>) -> Result<Self, Self::Error>
@ -369,7 +422,10 @@ impl<S: Store + 'static> FormData for BackgroundedUpload<S> {
async fn upload_backgrounded<S: Store>(
Multipart(BackgroundedUpload(value, _)): Multipart<BackgroundedUpload<S>>,
state: web::Data<State<S>>,
upload_query: web::Query<UploadQuery>,
) -> Result<HttpResponse, Error> {
let upload_query = upload_query.into_inner();
let images = value
.map()
.and_then(|mut m| m.remove("images"))
@ -386,7 +442,14 @@ async fn upload_backgrounded<S: Store>(
let upload_id = image.result.upload_id().expect("Upload ID exists");
let identifier = image.result.identifier().expect("Identifier exists");
queue::queue_ingest(&state.repo, identifier, upload_id, None).await?;
queue::queue_ingest(
&state.repo,
identifier,
upload_id,
None,
upload_query.clone(),
)
.await?;
files.push(serde_json::json!({
"upload_id": upload_id.to_string(),
@ -456,14 +519,24 @@ struct UrlQuery {
url: String,
#[serde(default)]
backgrounded: bool,
backgrounded: Serde<bool>,
}
#[derive(Debug, serde::Deserialize)]
struct DownloadQuery {
#[serde(flatten)]
url_query: UrlQuery,
#[serde(flatten)]
upload_query: UploadQuery,
}
async fn ingest_inline<S: Store + 'static>(
stream: impl Stream<Item = Result<web::Bytes, Error>>,
state: &State<S>,
upload_query: &UploadQuery,
) -> Result<(Alias, DeleteToken, Details), Error> {
let session = ingest::ingest(state, stream, None).await?;
let session = ingest::ingest(state, stream, None, upload_query).await?;
let alias = session.alias().expect("alias should exist").to_owned();
@ -477,15 +550,20 @@ async fn ingest_inline<S: Store + 'static>(
/// download an image from a URL
#[tracing::instrument(name = "Downloading file", skip(state))]
async fn download<S: Store + 'static>(
query: web::Query<UrlQuery>,
download_query: web::Query<DownloadQuery>,
state: web::Data<State<S>>,
) -> Result<HttpResponse, Error> {
let stream = download_stream(&query.url, &state).await?;
let DownloadQuery {
url_query,
upload_query,
} = download_query.into_inner();
if query.backgrounded {
do_download_backgrounded(stream, state).await
let stream = download_stream(&url_query.url, &state).await?;
if *url_query.backgrounded {
do_download_backgrounded(stream, state, upload_query).await
} else {
do_download_inline(stream, &state).await
do_download_inline(stream, &state, &upload_query).await
}
}
@ -500,7 +578,7 @@ async fn download_stream<S>(
let res = state.client.get(url).send().await?;
if !res.status().is_success() {
return Err(UploadError::Download(res.status()).into());
return Err(UploadError::Download(http1::to_actix_status(res.status())).into());
}
let stream = crate::stream::limit(
@ -515,10 +593,11 @@ async fn download_stream<S>(
async fn do_download_inline<S: Store + 'static>(
stream: impl Stream<Item = Result<web::Bytes, Error>>,
state: &State<S>,
upload_query: &UploadQuery,
) -> Result<HttpResponse, Error> {
metrics::counter!(crate::init_metrics::FILES, "download" => "inline").increment(1);
let (alias, delete_token, details) = ingest_inline(stream, state).await?;
let (alias, delete_token, details) = ingest_inline(stream, state, upload_query).await?;
Ok(HttpResponse::Created().json(&serde_json::json!({
"msg": "ok",
@ -534,6 +613,7 @@ async fn do_download_inline<S: Store + 'static>(
async fn do_download_backgrounded<S: Store + 'static>(
stream: impl Stream<Item = Result<web::Bytes, Error>>,
state: web::Data<State<S>>,
upload_query: UploadQuery,
) -> Result<HttpResponse, Error> {
metrics::counter!(crate::init_metrics::FILES, "download" => "background").increment(1);
@ -542,7 +622,7 @@ async fn do_download_backgrounded<S: Store + 'static>(
let upload_id = backgrounded.upload_id().expect("Upload ID exists");
let identifier = backgrounded.identifier().expect("Identifier exists");
queue::queue_ingest(&state.repo, identifier, upload_id, None).await?;
queue::queue_ingest(&state.repo, identifier, upload_id, None, upload_query).await?;
backgrounded.disarm();
@ -691,7 +771,7 @@ fn prepare_process(
config: &Configuration,
operations: Vec<(String, String)>,
ext: &str,
) -> Result<(InputProcessableFormat, PathBuf, Vec<String>), Error> {
) -> Result<(InputProcessableFormat, String, Vec<String>), Error> {
let operations = operations
.into_iter()
.filter(|(k, _)| config.media.filters.contains(&k.to_lowercase()))
@ -701,10 +781,9 @@ fn prepare_process(
.parse::<InputProcessableFormat>()
.map_err(|_| UploadError::UnsupportedProcessExtension)?;
let (thumbnail_path, thumbnail_args) =
self::processor::build_chain(&operations, &format.to_string())?;
let (variant, variant_args) = self::processor::build_chain(&operations, &format.to_string())?;
Ok((format, thumbnail_path, thumbnail_args))
Ok((format, variant, variant_args))
}
#[tracing::instrument(name = "Fetching derived details", skip(state))]
@ -715,7 +794,7 @@ async fn process_details<S: Store>(
) -> Result<HttpResponse, Error> {
let alias = alias_from_query(source.into(), &state).await?;
let (_, thumbnail_path, _) = prepare_process(&state.config, operations, ext.as_str())?;
let (_, variant, _) = prepare_process(&state.config, operations, ext.as_str())?;
let hash = state
.repo
@ -723,18 +802,16 @@ async fn process_details<S: Store>(
.await?
.ok_or(UploadError::MissingAlias)?;
let thumbnail_string = thumbnail_path.to_string_lossy().to_string();
if !state.config.server.read_only {
state
.repo
.accessed_variant(hash.clone(), thumbnail_string.clone())
.accessed_variant(hash.clone(), variant.clone())
.await?;
}
let identifier = state
.repo
.variant_identifier(hash, thumbnail_string)
.variant_identifier(hash, variant)
.await?
.ok_or(UploadError::MissingAlias)?;
@ -764,20 +841,16 @@ async fn not_found_hash(repo: &ArcRepo) -> Result<Option<(Alias, Hash)>, Error>
}
/// Process files
#[tracing::instrument(name = "Serving processed image", skip(state, process_map))]
#[tracing::instrument(name = "Serving processed image", skip(state))]
async fn process<S: Store + 'static>(
range: Option<web::Header<Range>>,
web::Query(ProcessQuery { source, operations }): web::Query<ProcessQuery>,
ext: web::Path<String>,
state: web::Data<State<S>>,
process_map: web::Data<ProcessMap>,
) -> Result<HttpResponse, Error> {
let alias = proxy_alias_from_query(source.into(), &state).await?;
let (format, thumbnail_path, thumbnail_args) =
prepare_process(&state.config, operations, ext.as_str())?;
let path_string = thumbnail_path.to_string_lossy().to_string();
let (format, variant, variant_args) = prepare_process(&state.config, operations, ext.as_str())?;
let (hash, alias, not_found) = if let Some(hash) = state.repo.hash(&alias).await? {
(hash, alias, false)
@ -792,13 +865,13 @@ async fn process<S: Store + 'static>(
if !state.config.server.read_only {
state
.repo
.accessed_variant(hash.clone(), path_string.clone())
.accessed_variant(hash.clone(), variant.clone())
.await?;
}
let identifier_opt = state
.repo
.variant_identifier(hash.clone(), path_string)
.variant_identifier(hash.clone(), variant.clone())
.await?;
let (details, identifier) = if let Some(identifier) = identifier_opt {
@ -810,18 +883,34 @@ async fn process<S: Store + 'static>(
return Err(UploadError::ReadOnly.into());
}
let original_details = ensure_details(&state, &alias).await?;
queue_generate(&state.repo, format, alias, variant.clone(), variant_args).await?;
generate::generate(
&state,
&process_map,
format,
thumbnail_path,
thumbnail_args,
&original_details,
hash,
)
.await?
let mut attempts = 0;
loop {
if attempts > 6 {
return Err(UploadError::ProcessTimeout.into());
}
let entry = state
.repo
.variant_waiter(hash.clone(), variant.clone())
.await?;
let opt = generate::wait_timeout(
hash.clone(),
variant.clone(),
entry,
&state,
Duration::from_secs(5),
)
.await?;
if let Some(tuple) = opt {
break tuple;
}
attempts += 1;
}
};
if let Some(public_url) = state.store.public_url(&identifier) {
@ -852,9 +941,8 @@ async fn process_head<S: Store + 'static>(
}
};
let (_, thumbnail_path, _) = prepare_process(&state.config, operations, ext.as_str())?;
let (_, variant, _) = prepare_process(&state.config, operations, ext.as_str())?;
let path_string = thumbnail_path.to_string_lossy().to_string();
let Some(hash) = state.repo.hash(&alias).await? else {
// Invalid alias
return Ok(HttpResponse::NotFound().finish());
@ -863,14 +951,11 @@ async fn process_head<S: Store + 'static>(
if !state.config.server.read_only {
state
.repo
.accessed_variant(hash.clone(), path_string.clone())
.accessed_variant(hash.clone(), variant.clone())
.await?;
}
let identifier_opt = state
.repo
.variant_identifier(hash.clone(), path_string)
.await?;
let identifier_opt = state.repo.variant_identifier(hash.clone(), variant).await?;
if let Some(identifier) = identifier_opt {
let details = ensure_details_identifier(&state, &identifier).await?;
@ -889,7 +974,7 @@ async fn process_head<S: Store + 'static>(
/// Process files
#[tracing::instrument(name = "Spawning image process", skip(state))]
async fn process_backgrounded<S: Store>(
async fn process_backgrounded<S: Store + 'static>(
web::Query(ProcessQuery { source, operations }): web::Query<ProcessQuery>,
ext: web::Path<String>,
state: web::Data<State<S>>,
@ -906,10 +991,9 @@ async fn process_backgrounded<S: Store>(
}
};
let (target_format, process_path, process_args) =
let (target_format, variant, variant_args) =
prepare_process(&state.config, operations, ext.as_str())?;
let path_string = process_path.to_string_lossy().to_string();
let Some(hash) = state.repo.hash(&source).await? else {
// Invalid alias
return Ok(HttpResponse::BadRequest().finish());
@ -917,7 +1001,7 @@ async fn process_backgrounded<S: Store>(
let identifier_opt = state
.repo
.variant_identifier(hash.clone(), path_string)
.variant_identifier(hash.clone(), variant.clone())
.await?;
if identifier_opt.is_some() {
@ -928,14 +1012,7 @@ async fn process_backgrounded<S: Store>(
return Err(UploadError::ReadOnly.into());
}
queue_generate(
&state.repo,
target_format,
source,
process_path,
process_args,
)
.await?;
queue_generate(&state.repo, target_format, source, variant, variant_args).await?;
Ok(HttpResponse::Accepted().finish())
}
@ -1209,7 +1286,7 @@ async fn proxy_alias_from_query<S: Store + 'static>(
} else if !state.config.server.read_only {
let stream = download_stream(proxy.as_str(), state).await?;
let (alias, _, _) = ingest_inline(stream, state).await?;
let (alias, _, _) = ingest_inline(stream, state, &Default::default()).await?;
state.repo.relate_url(proxy, alias.clone()).await?;
@ -1494,15 +1571,25 @@ fn build_client() -> Result<ClientWithMiddleware, Error> {
.build())
}
fn query_config() -> web::QueryConfig {
web::QueryConfig::default()
.error_handler(|err, _| Error::from(UploadError::InvalidQuery(err)).into())
}
fn json_config() -> web::JsonConfig {
web::JsonConfig::default()
.error_handler(|err, _| Error::from(UploadError::InvalidJson(err)).into())
}
fn configure_endpoints<S: Store + 'static, F: Fn(&mut web::ServiceConfig)>(
config: &mut web::ServiceConfig,
state: State<S>,
process_map: ProcessMap,
extra_config: F,
) {
config
.app_data(query_config())
.app_data(json_config())
.app_data(web::Data::new(state.clone()))
.app_data(web::Data::new(process_map.clone()))
.route("/healthz", web::get().to(healthz::<S>))
.service(
web::scope("/image")
@ -1610,12 +1697,12 @@ fn spawn_cleanup<S>(state: State<S>) {
});
}
fn spawn_workers<S>(state: State<S>, process_map: ProcessMap)
fn spawn_workers<S>(state: State<S>)
where
S: Store + 'static,
{
crate::sync::spawn("cleanup-worker", queue::process_cleanup(state.clone()));
crate::sync::spawn("process-worker", queue::process_images(state, process_map));
crate::sync::spawn("process-worker", queue::process_images(state));
}
fn watch_keys(tls: Tls, sender: ChannelSender) -> DropHandle<()> {
@ -1641,8 +1728,6 @@ async fn launch<
state: State<S>,
extra_config: F,
) -> color_eyre::Result<()> {
let process_map = ProcessMap::new();
let address = state.config.server.address;
let tls = Tls::from_config(&state.config);
@ -1652,18 +1737,16 @@ async fn launch<
let server = HttpServer::new(move || {
let extra_config = extra_config.clone();
let state = state.clone();
let process_map = process_map.clone();
spawn_workers(state.clone(), process_map.clone());
spawn_workers(state.clone());
App::new()
.wrap(Log::new(state.config.tracing.logging.log_requests))
.wrap(TracingLogger::default())
.wrap(Deadline)
.wrap(Metrics)
.wrap(Payload::new())
.configure(move |sc| {
configure_endpoints(sc, state.clone(), process_map.clone(), extra_config)
})
.configure(move |sc| configure_endpoints(sc, state.clone(), extra_config))
});
if let Some(tls) = tls {
@ -1679,7 +1762,7 @@ async fn launch<
tracing::info!("Starting pict-rs with TLS on {address}");
server.bind_rustls_0_22(address, config)?.run().await?;
server.bind_rustls_0_23(address, config)?.run().await?;
handle.abort();
let _ = handle.await;
@ -1784,7 +1867,8 @@ impl<P: AsRef<Path>, T: serde::Serialize> ConfigSource<P, T> {
/// fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let configuration = pict_rs::ConfigSource::memory(serde_json::json!({
/// "server": {
/// "address": "127.0.0.1:8080"
/// "address": "127.0.0.1:8080",
/// "temporary_directory": "/tmp/t1"
/// },
/// "repo": {
/// "type": "sled",
@ -1852,6 +1936,19 @@ impl PictRsConfiguration {
Ok(self)
}
/// Install aws-lc-rs as the default crypto provider
///
/// This would happen automatically anyway unless rustls crate features get mixed up
pub fn install_crypto_provider(self) -> Self {
if rustls::crypto::aws_lc_rs::default_provider()
.install_default()
.is_err()
{
tracing::info!("rustls crypto provider already installed");
}
self
}
/// Run the pict-rs application on a tokio `LocalSet`
///
/// This must be called from within `tokio::main` directly
@ -1861,13 +1958,16 @@ impl PictRsConfiguration {
/// #[tokio::main]
/// async fn main() -> color_eyre::Result<()> {
/// let pict_rs_server = pict_rs::ConfigSource::memory(serde_json::json!({
/// "server": {
/// "temporary_directory": "/tmp/t2"
/// },
/// "repo": {
/// "type": "sled",
/// "path": "/tmp/pict-rs/run-on-localset/sled-repo",
/// "path": "/tmp/pict-rs-run-on-localset/sled-repo",
/// },
/// "store": {
/// "type": "filesystem",
/// "path": "/tmp/pict-rs/run-on-localset/files",
/// "path": "/tmp/pict-rs-run-on-localset/files",
/// },
/// }))
/// .init::<&str>(None)?
@ -1885,20 +1985,23 @@ impl PictRsConfiguration {
/// Run the pict-rs application
///
/// This must be called from within a tokio `LocalSet`, which is created by default for
/// actix-rt runtimes, and by tokio_uring
/// actix-rt runtimes
///
/// Example:
/// ```rust,ignore
/// ```rust
/// fn main() -> color_eyre::Result<()> {
/// tokio_uring::start(async move {
/// actix_web::rt::System::new().block_on(async move {
/// let pict_rs_server = pict_rs::ConfigSource::memory(serde_json::json!({
/// "server": {
/// "temporary_directory": "/tmp/t3"
/// },
/// "repo": {
/// "type": "sled",
/// "path": "/tmp/pict-rs/run/sled-repo",
/// "path": "/tmp/pict-rs-run/sled-repo",
/// },
/// "store": {
/// "type": "filesystem",
/// "path": "/tmp/pict-rs/run/files",
/// "path": "/tmp/pict-rs-run/files",
/// },
/// }))
/// .init::<&str>(None)?
@ -1911,12 +2014,21 @@ impl PictRsConfiguration {
/// }
/// ```
pub async fn run(self) -> color_eyre::Result<()> {
#[cfg(feature = "random-errors")]
tracing::error!("pict-rs has been compiled with with the 'random-errors' feature enabled.");
#[cfg(feature = "random-errors")]
tracing::error!("This is not suitable for production environments");
let PictRsConfiguration { config, operation } = self;
// describe all the metrics pict-rs produces
init_metrics::init_metrics();
let tmp_dir = TmpDir::init(&config.server.temporary_directory).await?;
let tmp_dir = TmpDir::init(
&config.server.temporary_directory,
config.server.cleanup_temporary_directory,
)
.await?;
let policy_dir = magick::write_magick_policy(&config.media, &tmp_dir).await?;
let client = build_client()?;

View file

@ -106,7 +106,8 @@ pub(crate) async fn process_image_command<S>(
(MAGICK_CONFIGURE_PATH, state.policy_dir.as_os_str()),
];
let process = Process::run("magick", &args, &envs, state.config.media.process_timeout)?
let process = Process::run("magick", &args, &envs, state.config.media.process_timeout)
.await?
.add_extras(temporary_path);
Ok(process)

View file

@ -1,9 +1,10 @@
#[cfg(feature = "io-uring")]
fn main() -> color_eyre::Result<()> {
tokio_uring::start(async move {
actix_web::rt::System::new().block_on(async move {
pict_rs::PictRsConfiguration::build_default()?
.install_tracing()?
.install_metrics()?
.install_crypto_provider()
.run()
.await
})
@ -18,6 +19,7 @@ fn main() -> color_eyre::Result<()> {
pict_rs::PictRsConfiguration::build_default()?
.install_tracing()?
.install_metrics()?
.install_crypto_provider()
.run_on_localset()
.await
})

View file

@ -1,9 +1,11 @@
mod deadline;
mod internal;
mod log;
mod metrics;
mod payload;
pub(crate) use self::deadline::Deadline;
pub(crate) use self::internal::Internal;
pub(crate) use self::log::Log;
pub(crate) use self::metrics::Metrics;
pub(crate) use self::payload::Payload;

223
src/middleware/log.rs Normal file
View file

@ -0,0 +1,223 @@
use std::future::{ready, Future, Ready};
use actix_web::{
body::MessageBody,
dev::{Service, ServiceRequest, ServiceResponse, Transform},
http::StatusCode,
ResponseError,
};
pub(crate) struct Log {
info: bool,
}
pub(crate) struct LogMiddleware<S> {
info: bool,
inner: S,
}
impl Log {
pub(crate) fn new(info: bool) -> Self {
Self { info }
}
}
#[derive(Debug)]
pub(crate) struct LogError {
info: bool,
error: actix_web::Error,
}
pin_project_lite::pin_project! {
pub(crate) struct LogFuture<F> {
info: bool,
#[pin]
inner: F,
}
}
pin_project_lite::pin_project! {
pub(crate) struct LogBody<B> {
info: bool,
status: Option<StatusCode>,
#[pin]
inner: B,
}
}
impl<S, B> Transform<S, ServiceRequest> for Log
where
B: MessageBody,
S: Service<ServiceRequest, Response = ServiceResponse<B>>,
S::Future: 'static,
S::Error: Into<actix_web::Error>,
{
type Response = ServiceResponse<LogBody<B>>;
type Error = actix_web::Error;
type InitError = ();
type Transform = LogMiddleware<S>;
type Future = Ready<Result<Self::Transform, Self::InitError>>;
fn new_transform(&self, service: S) -> Self::Future {
ready(Ok(LogMiddleware {
info: self.info,
inner: service,
}))
}
}
impl<S, B> Service<ServiceRequest> for LogMiddleware<S>
where
B: MessageBody,
S: Service<ServiceRequest, Response = ServiceResponse<B>>,
S::Future: 'static,
S::Error: Into<actix_web::Error>,
{
type Response = ServiceResponse<LogBody<B>>;
type Error = actix_web::Error;
type Future = LogFuture<S::Future>;
fn poll_ready(
&self,
ctx: &mut core::task::Context<'_>,
) -> std::task::Poll<Result<(), Self::Error>> {
self.inner.poll_ready(ctx).map(|res| {
res.map_err(|e| {
LogError {
info: self.info,
error: e.into(),
}
.into()
})
})
}
fn call(&self, req: ServiceRequest) -> Self::Future {
LogFuture {
info: self.info,
inner: self.inner.call(req),
}
}
}
impl<F, B, E> Future for LogFuture<F>
where
B: MessageBody,
F: Future<Output = Result<ServiceResponse<B>, E>>,
E: Into<actix_web::Error>,
{
type Output = Result<ServiceResponse<LogBody<B>>, actix_web::Error>;
fn poll(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Self::Output> {
let info = self.info;
let this = self.project();
std::task::Poll::Ready(match std::task::ready!(this.inner.poll(cx)) {
Ok(response) => {
let status = response.status();
let status = if response.response().body().size().is_eof() {
emit(status, info);
None
} else {
Some(status)
};
Ok(response.map_body(|_, inner| LogBody {
info,
status,
inner,
}))
}
Err(e) => Err(LogError {
info,
error: e.into(),
}
.into()),
})
}
}
impl<B> MessageBody for LogBody<B>
where
B: MessageBody,
{
type Error = B::Error;
fn size(&self) -> actix_web::body::BodySize {
self.inner.size()
}
fn poll_next(
self: std::pin::Pin<&mut Self>,
cx: &mut std::task::Context<'_>,
) -> std::task::Poll<Option<Result<actix_web::web::Bytes, Self::Error>>> {
let this = self.project();
let opt = std::task::ready!(this.inner.poll_next(cx));
if opt.is_none() {
if let Some(status) = this.status.take() {
emit(status, *this.info);
}
}
std::task::Poll::Ready(opt)
}
}
impl std::fmt::Display for LogError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.error.fmt(f)
}
}
impl std::error::Error for LogError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
self.error.source()
}
}
impl ResponseError for LogError {
fn status_code(&self) -> actix_web::http::StatusCode {
self.error.as_response_error().status_code()
}
fn error_response(&self) -> actix_web::HttpResponse<actix_web::body::BoxBody> {
let response = self.error.error_response();
let status = response.status();
if response.body().size().is_eof() {
emit(status, self.info);
response
} else {
response.map_body(|_, inner| {
LogBody {
info: self.info,
status: Some(status),
inner,
}
.boxed()
})
}
}
}
fn emit(status: StatusCode, info: bool) {
if status.is_server_error() {
tracing::error!("server error");
} else if status.is_client_error() {
tracing::warn!("client error");
} else if status.is_redirection() {
tracing::info!("redirected");
} else if info {
tracing::info!("completed");
} else {
tracing::debug!("completed");
}
}

View file

@ -45,10 +45,10 @@ impl Drop for MetricsGuard {
}
}
async fn drain(rx: flume::Receiver<actix_web::dev::Payload>) {
async fn drain(mut rx: tokio::sync::mpsc::Receiver<actix_web::dev::Payload>) {
let mut set = JoinSet::new();
while let Ok(payload) = rx.recv_async().await {
while let Some(payload) = rx.recv().await {
tracing::trace!("drain: looping");
// draining a payload is a best-effort task - if we can't collect in 2 minutes we bail
@ -94,18 +94,18 @@ async fn drain(rx: flume::Receiver<actix_web::dev::Payload>) {
struct DrainHandle(Option<Rc<tokio::task::JoinHandle<()>>>);
pub(crate) struct Payload {
sender: flume::Sender<actix_web::dev::Payload>,
sender: tokio::sync::mpsc::Sender<actix_web::dev::Payload>,
handle: DrainHandle,
}
pub(crate) struct PayloadMiddleware<S> {
inner: S,
sender: flume::Sender<actix_web::dev::Payload>,
sender: tokio::sync::mpsc::Sender<actix_web::dev::Payload>,
_handle: DrainHandle,
}
pub(crate) struct PayloadStream {
inner: Option<actix_web::dev::Payload>,
sender: flume::Sender<actix_web::dev::Payload>,
sender: tokio::sync::mpsc::Sender<actix_web::dev::Payload>,
}
impl DrainHandle {

View file

@ -9,7 +9,7 @@ use std::{
use futures_core::Stream;
use streem::IntoStreamer;
use tokio::{
io::{AsyncRead, AsyncReadExt, AsyncWriteExt},
io::{AsyncReadExt, AsyncWriteExt},
process::{Child, ChildStdin, Command},
};
use tokio_util::{bytes::Bytes, io::ReaderStream};
@ -59,9 +59,6 @@ impl Drop for MetricsGuard {
}
}
#[derive(Debug)]
struct StatusError(ExitStatus);
pub(crate) struct Process {
command: Arc<str>,
child: Child,
@ -155,7 +152,7 @@ impl ProcessError {
}
impl Process {
pub(crate) fn run<T>(
pub(crate) async fn run<T>(
command: &str,
args: &[T],
envs: &[(&str, &OsStr)],
@ -168,15 +165,10 @@ impl Process {
tracing::debug!("{envs:?} {command} {args:?}");
let res = tracing::trace_span!(parent: None, "Create command", %command).in_scope(|| {
Self::spawn(
command.clone(),
Command::new(command.as_ref())
.args(args)
.envs(envs.iter().copied()),
timeout,
)
});
let mut cmd = Command::new(command.as_ref());
cmd.args(args).envs(envs.iter().copied());
let res = Self::spawn(command.clone(), cmd, timeout).await;
match res {
Ok(this) => Ok(this),
@ -191,16 +183,17 @@ impl Process {
}
}
fn spawn(command: Arc<str>, cmd: &mut Command, timeout: u64) -> std::io::Result<Self> {
tracing::trace_span!(parent: None, "Spawn command", %command).in_scope(|| {
let guard = MetricsGuard::guard(command.clone());
async fn spawn(command: Arc<str>, mut cmd: Command, timeout: u64) -> std::io::Result<Self> {
let guard = MetricsGuard::guard(command.clone());
let cmd = cmd
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.kill_on_drop(true);
cmd.stdin(Stdio::piped())
.stdout(Stdio::piped())
.kill_on_drop(true);
cmd.spawn().map(|child| Process {
crate::sync::spawn_blocking("spawn-command", move || cmd.spawn())
.await
.expect("spawn panicked")
.map(|child| Process {
child,
command,
guard,
@ -208,7 +201,6 @@ impl Process {
extras: Box::new(()),
id: Uuid::now_v7(),
})
})
}
pub(crate) fn add_extras(self, extra: impl Extras + 'static) -> Self {
@ -251,23 +243,6 @@ impl Process {
}
}
pub(crate) fn drive_with_async_read(self, input: impl AsyncRead + 'static) -> ProcessRead {
self.drive(move |mut stdin| {
async move {
let mut input = std::pin::pin!(input);
match tokio::io::copy(&mut input, &mut stdin).await {
Ok(_) => Ok(()),
// BrokenPipe means we finished reading from Stdout, so we don't need to write
// to stdin. We'll still error out if the command failed so treat this as a
// success
Err(e) if e.kind() == std::io::ErrorKind::BrokenPipe => Ok(()),
Err(e) => Err(e),
}
}
})
}
pub(crate) fn drive_with_stream<S>(self, input: S) -> ProcessRead
where
S: Stream<Item = std::io::Result<Bytes>> + 'static,
@ -282,6 +257,7 @@ impl Process {
Err(e) if e.kind() == std::io::ErrorKind::BrokenPipe => break,
Err(e) => return Err(e),
}
crate::sync::cooperate().await;
}
Ok(())
@ -454,6 +430,16 @@ impl ProcessRead {
self,
f: impl FnOnce(BoxRead<'static>) -> Fut,
) -> Result<Fut::Output, ProcessError>
where
Fut: Future,
{
self.with_stdout_inner(f).await
}
async fn with_stdout_inner<Fut>(
self,
f: impl FnOnce(BoxRead<'static>) -> Fut,
) -> Result<Fut::Output, ProcessError>
where
Fut: Future,
{
@ -498,11 +484,3 @@ impl ProcessRead {
}
}
}
impl std::fmt::Display for StatusError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "Command failed with bad status: {}", self.0)
}
}
impl std::error::Error for StatusError {}

View file

@ -91,7 +91,7 @@ impl ResizeKind {
pub(crate) fn build_chain(
args: &[(String, String)],
ext: &str,
) -> Result<(PathBuf, Vec<String>), Error> {
) -> Result<(String, Vec<String>), Error> {
fn parse<P: Processor>(key: &str, value: &str) -> Result<Option<P>, Error> {
if key == P::NAME {
return Ok(Some(P::parse(key, value).ok_or(UploadError::ParsePath)?));
@ -122,7 +122,7 @@ pub(crate) fn build_chain(
path.push(ext);
Ok((path, args))
Ok((path.to_string_lossy().to_string(), args))
}
impl Processor for Identity {

View file

@ -1,19 +1,21 @@
use crate::{
concurrent_processor::ProcessMap,
error::{Error, UploadError},
formats::InputProcessableFormat,
future::LocalBoxFuture,
future::{LocalBoxFuture, WithPollTimer},
repo::{Alias, ArcRepo, DeleteToken, Hash, JobId, UploadId},
serde_str::Serde,
state::State,
store::Store,
UploadQuery,
};
use std::{
path::PathBuf,
ops::Deref,
rc::Rc,
sync::Arc,
time::{Duration, Instant},
};
use tokio::task::JoinError;
use tracing::Instrument;
pub(crate) mod cleanup;
@ -55,11 +57,13 @@ enum Process {
identifier: String,
upload_id: Serde<UploadId>,
declared_alias: Option<Serde<Alias>>,
#[serde(default)]
upload_query: UploadQuery,
},
Generate {
target_format: InputProcessableFormat,
source: Serde<Alias>,
process_path: PathBuf,
process_path: String,
process_args: Vec<String>,
},
}
@ -157,11 +161,13 @@ pub(crate) async fn queue_ingest(
identifier: &Arc<str>,
upload_id: UploadId,
declared_alias: Option<Alias>,
upload_query: UploadQuery,
) -> Result<(), Error> {
let job = serde_json::to_value(Process::Ingest {
identifier: identifier.to_string(),
declared_alias: declared_alias.map(Serde::new),
upload_id: Serde::new(upload_id),
upload_query,
})
.map_err(UploadError::PushJob)?;
repo.push(PROCESS_QUEUE, job, None).await?;
@ -172,13 +178,13 @@ pub(crate) async fn queue_generate(
repo: &ArcRepo,
target_format: InputProcessableFormat,
source: Alias,
process_path: PathBuf,
variant: String,
process_args: Vec<String>,
) -> Result<(), Error> {
let job = serde_json::to_value(Process::Generate {
target_format,
source: Serde::new(source),
process_path,
process_path: variant,
process_args,
})
.map_err(UploadError::PushJob)?;
@ -190,8 +196,8 @@ pub(crate) async fn process_cleanup<S: Store + 'static>(state: State<S>) {
process_jobs(state, CLEANUP_QUEUE, cleanup::perform).await
}
pub(crate) async fn process_images<S: Store + 'static>(state: State<S>, process_map: ProcessMap) {
process_image_jobs(state, process_map, PROCESS_QUEUE, process::perform).await
pub(crate) async fn process_images<S: Store + 'static>(state: State<S>) {
process_jobs(state, PROCESS_QUEUE, process::perform).await
}
struct MetricsGuard {
@ -225,145 +231,170 @@ impl Drop for MetricsGuard {
}
}
pub(super) enum JobError {
Abort(Error),
Retry(Error),
}
impl AsRef<Error> for JobError {
fn as_ref(&self) -> &Error {
match self {
Self::Abort(e) | Self::Retry(e) => e,
}
}
}
impl Deref for JobError {
type Target = Error;
fn deref(&self) -> &Self::Target {
match self {
Self::Abort(e) | Self::Retry(e) => e,
}
}
}
impl From<JobError> for Error {
fn from(value: JobError) -> Self {
match value {
JobError::Abort(e) | JobError::Retry(e) => e,
}
}
}
type JobResult<T = ()> = Result<T, JobError>;
type JobFuture<'a> = LocalBoxFuture<'a, JobResult>;
trait JobContext {
type Item;
fn abort(self) -> JobResult<Self::Item>
where
Self: Sized;
fn retry(self) -> JobResult<Self::Item>
where
Self: Sized;
}
impl<T, E> JobContext for Result<T, E>
where
E: Into<Error>,
{
type Item = T;
fn abort(self) -> JobResult<Self::Item>
where
Self: Sized,
{
self.map_err(Into::into).map_err(JobError::Abort)
}
fn retry(self) -> JobResult<Self::Item>
where
Self: Sized,
{
self.map_err(Into::into).map_err(JobError::Retry)
}
}
fn job_result(result: &Result<JobResult, JoinError>) -> crate::repo::JobResult {
match result {
Ok(Ok(())) => crate::repo::JobResult::Success,
Ok(Err(JobError::Retry(_))) => crate::repo::JobResult::Failure,
Ok(Err(JobError::Abort(_))) => crate::repo::JobResult::Aborted,
Err(_) => crate::repo::JobResult::Aborted,
}
}
async fn process_jobs<S, F>(state: State<S>, queue: &'static str, callback: F)
where
S: Store,
for<'a> F: Fn(&'a State<S>, serde_json::Value) -> LocalBoxFuture<'a, Result<(), Error>> + Copy,
S: Store + 'static,
for<'a> F: Fn(&'a State<S>, serde_json::Value) -> JobFuture<'a> + Copy + 'static,
{
let worker_id = uuid::Uuid::new_v4();
let state = Rc::new(state);
loop {
tracing::trace!("process_jobs: looping");
tokio::task::yield_now().await;
crate::sync::cooperate().await;
let res = job_loop(&state, worker_id, queue, callback).await;
// add a panic boundary by spawning a task
let res = crate::sync::spawn(
"job-loop",
job_loop(state.clone(), worker_id, queue, callback),
)
.await;
if let Err(e) = res {
tracing::warn!("Error processing jobs: {}", format!("{e}"));
tracing::warn!("{}", format!("{e:?}"));
match res {
// clean exit
Ok(Ok(())) => break,
if e.is_disconnected() {
tokio::time::sleep(Duration::from_secs(10)).await;
// job error
Ok(Err(e)) => {
tracing::warn!("Error processing jobs: {}", format!("{e}"));
tracing::warn!("{}", format!("{e:?}"));
if e.is_disconnected() {
tokio::time::sleep(Duration::from_secs(10)).await;
}
}
continue;
// job panic
Err(_) => {
tracing::warn!("Panic while processing jobs");
}
}
break;
}
}
async fn job_loop<S, F>(
state: &State<S>,
state: Rc<State<S>>,
worker_id: uuid::Uuid,
queue: &'static str,
callback: F,
) -> Result<(), Error>
where
S: Store,
for<'a> F: Fn(&'a State<S>, serde_json::Value) -> LocalBoxFuture<'a, Result<(), Error>> + Copy,
S: Store + 'static,
for<'a> F: Fn(&'a State<S>, serde_json::Value) -> JobFuture<'a> + Copy + 'static,
{
loop {
tracing::trace!("job_loop: looping");
tokio::task::yield_now().await;
crate::sync::cooperate().with_poll_timer("cooperate").await;
async {
let (job_id, job) = state.repo.pop(queue, worker_id).await?;
let (job_id, job) = state
.repo
.pop(queue, worker_id)
.with_poll_timer("pop-job")
.await?;
let guard = MetricsGuard::guard(worker_id, queue);
let res = heartbeat(
&state.repo,
queue,
worker_id,
job_id,
(callback)(state, job),
)
let state2 = state.clone();
let res = crate::sync::spawn("job-and-heartbeat", async move {
let state = state2;
heartbeat(
&state.repo,
queue,
worker_id,
job_id,
(callback)(&state, job),
)
.await
})
.await;
state.repo.complete_job(queue, worker_id, job_id).await?;
state
.repo
.complete_job(queue, worker_id, job_id, job_result(&res))
.with_poll_timer("job-complete")
.await?;
res?;
guard.disarm();
Ok(()) as Result<(), Error>
}
.instrument(tracing::info_span!("tick", %queue, %worker_id))
.await?;
}
}
async fn process_image_jobs<S, F>(
state: State<S>,
process_map: ProcessMap,
queue: &'static str,
callback: F,
) where
S: Store,
for<'a> F: Fn(&'a State<S>, &'a ProcessMap, serde_json::Value) -> LocalBoxFuture<'a, Result<(), Error>>
+ Copy,
{
let worker_id = uuid::Uuid::new_v4();
loop {
tracing::trace!("process_image_jobs: looping");
tokio::task::yield_now().await;
let res = image_job_loop(&state, &process_map, worker_id, queue, callback).await;
if let Err(e) = res {
tracing::warn!("Error processing jobs: {}", format!("{e}"));
tracing::warn!("{}", format!("{e:?}"));
if e.is_disconnected() {
tokio::time::sleep(Duration::from_secs(10)).await;
}
continue;
}
break;
}
}
async fn image_job_loop<S, F>(
state: &State<S>,
process_map: &ProcessMap,
worker_id: uuid::Uuid,
queue: &'static str,
callback: F,
) -> Result<(), Error>
where
S: Store,
for<'a> F: Fn(&'a State<S>, &'a ProcessMap, serde_json::Value) -> LocalBoxFuture<'a, Result<(), Error>>
+ Copy,
{
loop {
tracing::trace!("image_job_loop: looping");
tokio::task::yield_now().await;
async {
let (job_id, job) = state.repo.pop(queue, worker_id).await?;
let guard = MetricsGuard::guard(worker_id, queue);
let res = heartbeat(
&state.repo,
queue,
worker_id,
job_id,
(callback)(state, process_map, job),
)
.await;
state.repo.complete_job(queue, worker_id, job_id).await?;
res?;
res.map_err(|_| UploadError::Canceled)??;
guard.disarm();
@ -385,7 +416,9 @@ async fn heartbeat<Fut>(
where
Fut: std::future::Future,
{
let mut fut = std::pin::pin!(fut.instrument(tracing::info_span!("job-future")));
let mut fut = std::pin::pin!(fut
.with_poll_timer("job-future")
.instrument(tracing::info_span!("job-future")));
let mut interval = tokio::time::interval(Duration::from_secs(5));
@ -394,7 +427,7 @@ where
loop {
tracing::trace!("heartbeat: looping");
tokio::task::yield_now().await;
crate::sync::cooperate().await;
tokio::select! {
biased;

View file

@ -6,7 +6,7 @@ use tracing::{Instrument, Span};
use crate::{
config::Configuration,
error::{Error, UploadError},
future::LocalBoxFuture,
future::WithPollTimer,
queue::Cleanup,
repo::{Alias, ArcRepo, DeleteToken, Hash},
serde_str::Serde,
@ -14,41 +14,76 @@ use crate::{
store::Store,
};
pub(super) fn perform<S>(
state: &State<S>,
job: serde_json::Value,
) -> LocalBoxFuture<'_, Result<(), Error>>
use super::{JobContext, JobFuture, JobResult};
pub(super) fn perform<S>(state: &State<S>, job: serde_json::Value) -> JobFuture<'_>
where
S: Store + 'static,
{
Box::pin(async move {
match serde_json::from_value(job) {
Ok(job) => match job {
Cleanup::Hash { hash: in_hash } => hash(&state.repo, in_hash).await?,
Cleanup::Identifier {
identifier: in_identifier,
} => identifier(&state.repo, &state.store, Arc::from(in_identifier)).await?,
Cleanup::Alias {
alias: stored_alias,
token,
} => {
alias(
&state.repo,
Serde::into_inner(stored_alias),
Serde::into_inner(token),
)
let job_text = format!("{job}");
#[cfg(feature = "random-errors")]
{
use nanorand::Rng;
if nanorand::tls_rng().generate_range(0..25) < 1 {
return Err(crate::error::UploadError::RandomError).retry();
}
}
let job = serde_json::from_value(job)
.map_err(|e| UploadError::InvalidJob(e, job_text))
.abort()?;
match job {
Cleanup::Hash { hash: in_hash } => {
hash(&state.repo, in_hash)
.with_poll_timer("cleanup-hash")
.await?
}
Cleanup::Identifier {
identifier: in_identifier,
} => {
identifier(&state.repo, &state.store, Arc::from(in_identifier))
.with_poll_timer("cleanup-identifier")
.await?
}
Cleanup::Alias {
alias: stored_alias,
token,
} => {
alias(
&state.repo,
Serde::into_inner(stored_alias),
Serde::into_inner(token),
)
.await?
}
Cleanup::Variant { hash, variant } => {
hash_variant(&state.repo, hash, variant)
.with_poll_timer("cleanup-hash-variant")
.await?
}
Cleanup::AllVariants => {
all_variants(&state.repo)
.with_poll_timer("cleanup-all-variants")
.await?
}
Cleanup::OutdatedVariants => {
outdated_variants(&state.repo, &state.config)
.with_poll_timer("cleanup-outdated-variants")
.await?
}
Cleanup::OutdatedProxies => {
outdated_proxies(&state.repo, &state.config)
.with_poll_timer("cleanup-outdated-proxies")
.await?
}
Cleanup::Prune => {
prune(&state.repo, &state.store)
.with_poll_timer("cleanup-prune")
.await?
}
Cleanup::Variant { hash, variant } => {
hash_variant(&state.repo, hash, variant).await?
}
Cleanup::AllVariants => all_variants(&state.repo).await?,
Cleanup::OutdatedVariants => outdated_variants(&state.repo, &state.config).await?,
Cleanup::OutdatedProxies => outdated_proxies(&state.repo, &state.config).await?,
Cleanup::Prune => prune(&state.repo, &state.store).await?,
},
Err(e) => {
tracing::warn!("Invalid job: {}", format!("{e}"));
}
}
@ -57,36 +92,30 @@ where
}
#[tracing::instrument(skip_all)]
async fn identifier<S>(repo: &ArcRepo, store: &S, identifier: Arc<str>) -> Result<(), Error>
async fn identifier<S>(repo: &ArcRepo, store: &S, identifier: Arc<str>) -> JobResult
where
S: Store,
{
let mut errors = Vec::new();
if let Err(e) = store.remove(&identifier).await {
errors.push(UploadError::from(e));
match store.remove(&identifier).await {
Ok(_) => {}
Err(e) if e.is_not_found() => {}
Err(e) => return Err(e).retry(),
}
if let Err(e) = repo.cleanup_details(&identifier).await {
errors.push(UploadError::from(e));
}
for error in errors {
tracing::error!("{}", format!("{error:?}"));
}
repo.cleanup_details(&identifier).await.retry()?;
Ok(())
}
#[tracing::instrument(skip_all)]
async fn hash(repo: &ArcRepo, hash: Hash) -> Result<(), Error> {
let aliases = repo.aliases_for_hash(hash.clone()).await?;
async fn hash(repo: &ArcRepo, hash: Hash) -> JobResult {
let aliases = repo.aliases_for_hash(hash.clone()).await.retry()?;
if !aliases.is_empty() {
for alias in aliases {
// TODO: decide if it is okay to skip aliases without tokens
if let Some(token) = repo.delete_token(&alias).await? {
super::cleanup_alias(repo, alias, token).await?;
if let Some(token) = repo.delete_token(&alias).await.retry()? {
super::cleanup_alias(repo, alias, token).await.retry()?;
} else {
tracing::warn!("Not cleaning alias!");
}
@ -97,145 +126,161 @@ async fn hash(repo: &ArcRepo, hash: Hash) -> Result<(), Error> {
let mut idents = repo
.variants(hash.clone())
.await?
.await
.retry()?
.into_iter()
.map(|(_, v)| v)
.collect::<Vec<_>>();
idents.extend(repo.identifier(hash.clone()).await?);
idents.extend(repo.motion_identifier(hash.clone()).await?);
idents.extend(repo.identifier(hash.clone()).await.retry()?);
idents.extend(repo.motion_identifier(hash.clone()).await.retry()?);
for identifier in idents {
let _ = super::cleanup_identifier(repo, &identifier).await;
super::cleanup_identifier(repo, &identifier).await.retry()?;
}
repo.cleanup_hash(hash).await?;
repo.cleanup_hash(hash).await.retry()?;
Ok(())
}
#[tracing::instrument(skip_all)]
pub(crate) async fn alias(repo: &ArcRepo, alias: Alias, token: DeleteToken) -> Result<(), Error> {
let saved_delete_token = repo.delete_token(&alias).await?;
pub(crate) async fn alias(repo: &ArcRepo, alias: Alias, token: DeleteToken) -> JobResult {
let saved_delete_token = repo.delete_token(&alias).await.retry()?;
if saved_delete_token.is_none() {
let hash = repo.hash(&alias).await.retry()?;
// already deleted
if hash.is_none() {
return Ok(());
}
}
if !saved_delete_token.is_some_and(|t| t.ct_eq(&token)) {
return Err(UploadError::InvalidToken.into());
return Err(UploadError::InvalidToken).abort();
}
let hash = repo.hash(&alias).await?;
let hash = repo.hash(&alias).await.retry()?;
repo.cleanup_alias(&alias).await?;
repo.remove_relation(alias.clone()).await?;
repo.remove_alias_access(alias.clone()).await?;
repo.remove_relation(alias.clone()).await.retry()?;
repo.remove_alias_access(alias.clone()).await.retry()?;
repo.cleanup_alias(&alias).await.retry()?;
let Some(hash) = hash else {
// hash doesn't exist, nothing to do
return Ok(());
};
let hash = hash.ok_or(UploadError::MissingAlias).abort()?;
if repo.aliases_for_hash(hash.clone()).await?.is_empty() {
super::cleanup_hash(repo, hash).await?;
if repo
.aliases_for_hash(hash.clone())
.await
.retry()?
.is_empty()
{
super::cleanup_hash(repo, hash).await.retry()?;
}
Ok(())
}
#[tracing::instrument(skip_all)]
async fn all_variants(repo: &ArcRepo) -> Result<(), Error> {
async fn all_variants(repo: &ArcRepo) -> JobResult {
let hash_stream = std::pin::pin!(repo.hashes());
let mut hash_stream = hash_stream.into_streamer();
while let Some(res) = hash_stream.next().await {
tracing::trace!("all_variants: looping");
let hash = res?;
super::cleanup_variants(repo, hash, None).await?;
let hash = res.retry()?;
super::cleanup_variants(repo, hash, None).await.retry()?;
}
Ok(())
}
#[tracing::instrument(skip_all)]
async fn outdated_variants(repo: &ArcRepo, config: &Configuration) -> Result<(), Error> {
async fn outdated_variants(repo: &ArcRepo, config: &Configuration) -> JobResult {
let now = time::OffsetDateTime::now_utc();
let since = now.saturating_sub(config.media.retention.variants.to_duration());
let variant_stream = repo.older_variants(since).await?;
let variant_stream = repo.older_variants(since).await.retry()?;
let variant_stream = std::pin::pin!(crate::stream::take(variant_stream, 2048));
let mut variant_stream = variant_stream.into_streamer();
let mut count = 0;
while let Some(res) = variant_stream.next().await {
while let Some((hash, variant)) = variant_stream.try_next().await.retry()? {
metrics::counter!(crate::init_metrics::CLEANUP_OUTDATED_VARIANT).increment(1);
tracing::trace!("outdated_variants: looping");
let (hash, variant) = res?;
super::cleanup_variants(repo, hash, Some(variant)).await?;
super::cleanup_variants(repo, hash, Some(variant))
.await
.retry()?;
count += 1;
}
tracing::debug!("Queued {count} variant cleanup jobs");
let queue_length = repo.queue_length().await?;
let queue_length = repo.queue_length().await.abort()?;
tracing::debug!("Total queue length: {queue_length}");
Ok(())
}
#[tracing::instrument(skip_all)]
async fn outdated_proxies(repo: &ArcRepo, config: &Configuration) -> Result<(), Error> {
async fn outdated_proxies(repo: &ArcRepo, config: &Configuration) -> JobResult {
let now = time::OffsetDateTime::now_utc();
let since = now.saturating_sub(config.media.retention.proxy.to_duration());
let alias_stream = repo.older_aliases(since).await?;
let alias_stream = repo.older_aliases(since).await.retry()?;
let alias_stream = std::pin::pin!(crate::stream::take(alias_stream, 2048));
let mut alias_stream = alias_stream.into_streamer();
let mut count = 0;
while let Some(res) = alias_stream.next().await {
while let Some(alias) = alias_stream.try_next().await.retry()? {
metrics::counter!(crate::init_metrics::CLEANUP_OUTDATED_PROXY).increment(1);
tracing::trace!("outdated_proxies: looping");
let alias = res?;
if let Some(token) = repo.delete_token(&alias).await? {
super::cleanup_alias(repo, alias, token).await?;
if let Some(token) = repo.delete_token(&alias).await.retry()? {
super::cleanup_alias(repo, alias, token).await.retry()?;
count += 1;
} else {
tracing::warn!("Skipping alias cleanup - no delete token");
repo.remove_relation(alias.clone()).await?;
repo.remove_alias_access(alias).await?;
repo.remove_relation(alias.clone()).await.retry()?;
repo.remove_alias_access(alias).await.retry()?;
}
}
tracing::debug!("Queued {count} alias cleanup jobs");
let queue_length = repo.queue_length().await?;
let queue_length = repo.queue_length().await.abort()?;
tracing::debug!("Total queue length: {queue_length}");
Ok(())
}
#[tracing::instrument(skip_all)]
async fn hash_variant(
repo: &ArcRepo,
hash: Hash,
target_variant: Option<String>,
) -> Result<(), Error> {
async fn hash_variant(repo: &ArcRepo, hash: Hash, target_variant: Option<String>) -> JobResult {
if let Some(target_variant) = target_variant {
if let Some(identifier) = repo
.variant_identifier(hash.clone(), target_variant.clone())
.await?
.await
.retry()?
{
super::cleanup_identifier(repo, &identifier).await?;
super::cleanup_identifier(repo, &identifier).await.retry()?;
}
repo.remove_variant(hash.clone(), target_variant.clone())
.await?;
repo.remove_variant_access(hash, target_variant).await?;
.await
.retry()?;
repo.remove_variant_access(hash, target_variant)
.await
.retry()?;
} else {
for (variant, identifier) in repo.variants(hash.clone()).await? {
repo.remove_variant(hash.clone(), variant.clone()).await?;
repo.remove_variant_access(hash.clone(), variant).await?;
super::cleanup_identifier(repo, &identifier).await?;
for (variant, identifier) in repo.variants(hash.clone()).await.retry()? {
repo.remove_variant(hash.clone(), variant.clone())
.await
.retry()?;
repo.remove_variant_access(hash.clone(), variant)
.await
.retry()?;
super::cleanup_identifier(repo, &identifier).await.retry()?;
}
}
@ -243,19 +288,20 @@ async fn hash_variant(
}
#[tracing::instrument(skip_all)]
async fn prune<S>(repo: &ArcRepo, store: &S) -> Result<(), Error>
async fn prune<S>(repo: &ArcRepo, store: &S) -> JobResult
where
S: Store + 'static,
{
repo.set("prune-missing-started", b"1".to_vec().into())
.await?;
.await
.retry()?;
let hash_stream = std::pin::pin!(repo.hashes());
let mut hash_stream = hash_stream.into_streamer();
let mut count: u64 = 0;
while let Some(hash) = hash_stream.try_next().await? {
while let Some(hash) = hash_stream.try_next().await.retry()? {
tracing::trace!("prune: looping");
let repo = repo.clone();
@ -307,7 +353,8 @@ where
}
repo.set("prune-missing-complete", b"1".to_vec().into())
.await?;
.await
.retry()?;
Ok(())
}

View file

@ -1,63 +1,64 @@
use time::Instant;
use tracing::{Instrument, Span};
use crate::{
concurrent_processor::ProcessMap,
error::{Error, UploadError},
formats::InputProcessableFormat,
future::LocalBoxFuture,
future::WithPollTimer,
ingest::Session,
queue::Process,
repo::{Alias, UploadId, UploadResult},
serde_str::Serde,
state::State,
store::Store,
UploadQuery,
};
use std::{path::PathBuf, sync::Arc};
use std::{sync::Arc, time::Instant};
pub(super) fn perform<'a, S>(
state: &'a State<S>,
process_map: &'a ProcessMap,
job: serde_json::Value,
) -> LocalBoxFuture<'a, Result<(), Error>>
use super::{JobContext, JobFuture, JobResult};
pub(super) fn perform<S>(state: &State<S>, job: serde_json::Value) -> JobFuture<'_>
where
S: Store + 'static,
{
Box::pin(async move {
match serde_json::from_value(job) {
Ok(job) => match job {
Process::Ingest {
identifier,
upload_id,
declared_alias,
} => {
process_ingest(
state,
Arc::from(identifier),
Serde::into_inner(upload_id),
declared_alias.map(Serde::into_inner),
)
.await?
}
Process::Generate {
let job_text = format!("{job}");
let job = serde_json::from_value(job)
.map_err(|e| UploadError::InvalidJob(e, job_text))
.abort()?;
match job {
Process::Ingest {
identifier,
upload_id,
declared_alias,
upload_query,
} => {
process_ingest(
state,
Arc::from(identifier),
Serde::into_inner(upload_id),
declared_alias.map(Serde::into_inner),
upload_query,
)
.with_poll_timer("process-ingest")
.await?
}
Process::Generate {
target_format,
source,
process_path,
process_args,
} => {
generate(
state,
target_format,
source,
Serde::into_inner(source),
process_path,
process_args,
} => {
generate(
state,
process_map,
target_format,
Serde::into_inner(source),
process_path,
process_args,
)
.await?
}
},
Err(e) => {
tracing::warn!("Invalid job: {}", format!("{e}"));
)
.with_poll_timer("process-generate")
.await?
}
}
@ -88,7 +89,7 @@ impl UploadGuard {
impl Drop for UploadGuard {
fn drop(&mut self) {
metrics::counter!(crate::init_metrics::BACKGROUND_UPLOAD_INGEST, "completed" => (!self.armed).to_string()).increment(1);
metrics::histogram!(crate::init_metrics::BACKGROUND_UPLOAD_INGEST_DURATION, "completed" => (!self.armed).to_string()).record(self.start.elapsed().as_seconds_f64());
metrics::histogram!(crate::init_metrics::BACKGROUND_UPLOAD_INGEST_DURATION, "completed" => (!self.armed).to_string()).record(self.start.elapsed().as_secs_f64());
if self.armed {
tracing::warn!(
@ -105,13 +106,14 @@ async fn process_ingest<S>(
unprocessed_identifier: Arc<str>,
upload_id: UploadId,
declared_alias: Option<Alias>,
) -> Result<(), Error>
upload_query: UploadQuery,
) -> JobResult
where
S: Store + 'static,
{
let guard = UploadGuard::guard(upload_id);
let fut = async {
let res = async {
let ident = unprocessed_identifier.clone();
let state2 = state.clone();
@ -124,7 +126,8 @@ where
let stream =
crate::stream::from_err(state2.store.to_stream(&ident, None, None).await?);
let session = crate::ingest::ingest(&state2, stream, declared_alias).await?;
let session =
crate::ingest::ingest(&state2, stream, declared_alias, &upload_query).await?;
Ok(session) as Result<Session, Error>
}
@ -135,67 +138,78 @@ where
state.store.remove(&unprocessed_identifier).await?;
error_boundary.map_err(|_| UploadError::Canceled)?
};
}
.await;
let result = match fut.await {
let (result, err) = match res {
Ok(session) => {
let alias = session.alias().take().expect("Alias should exist").clone();
let token = session.disarm();
UploadResult::Success { alias, token }
(UploadResult::Success { alias, token }, None)
}
Err(e) => {
tracing::warn!("Failed to ingest\n{}\n{}", format!("{e}"), format!("{e:?}"));
Err(e) => (
UploadResult::Failure {
message: e.root_cause().to_string(),
code: e.error_code().into_owned(),
}
}
},
Some(e),
),
};
state.repo.complete_upload(upload_id, result).await?;
state
.repo
.complete_upload(upload_id, result)
.await
.retry()?;
if let Some(e) = err {
return Err(e).abort();
}
guard.disarm();
Ok(())
}
#[tracing::instrument(skip(state, process_map, process_path, process_args))]
#[tracing::instrument(skip(state, variant, process_args))]
async fn generate<S: Store + 'static>(
state: &State<S>,
process_map: &ProcessMap,
target_format: InputProcessableFormat,
source: Alias,
process_path: PathBuf,
variant: String,
process_args: Vec<String>,
) -> Result<(), Error> {
let Some(hash) = state.repo.hash(&source).await? else {
// Nothing to do
return Ok(());
};
) -> JobResult {
let hash = state
.repo
.hash(&source)
.await
.retry()?
.ok_or(UploadError::MissingAlias)
.abort()?;
let path_string = process_path.to_string_lossy().to_string();
let identifier_opt = state
.repo
.variant_identifier(hash.clone(), path_string)
.await?;
.variant_identifier(hash.clone(), variant.clone())
.await
.retry()?;
if identifier_opt.is_some() {
// don't generate already-generated variant
return Ok(());
}
let original_details = crate::ensure_details(state, &source).await?;
let original_details = crate::ensure_details(state, &source).await.retry()?;
crate::generate::generate(
state,
process_map,
target_format,
process_path,
variant,
process_args,
&original_details,
hash,
)
.await?;
.await
.abort()?;
Ok(())
}

View file

@ -3,6 +3,7 @@ mod delete_token;
mod hash;
mod metrics;
mod migrate;
mod notification_map;
use crate::{
config,
@ -23,6 +24,7 @@ pub(crate) use alias::Alias;
pub(crate) use delete_token::DeleteToken;
pub(crate) use hash::Hash;
pub(crate) use migrate::{migrate_04, migrate_repo};
pub(crate) use notification_map::NotificationEntry;
pub(crate) type ArcRepo = Arc<dyn FullRepo>;
@ -103,6 +105,7 @@ pub(crate) trait FullRepo:
+ AliasRepo
+ QueueRepo
+ HashRepo
+ VariantRepo
+ StoreMigrationRepo
+ AliasAccessRepo
+ VariantAccessRepo
@ -337,6 +340,13 @@ where
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct JobId(Uuid);
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum JobResult {
Success,
Failure,
Aborted,
}
impl JobId {
pub(crate) fn gen() -> Self {
Self(Uuid::now_v7())
@ -380,6 +390,7 @@ pub(crate) trait QueueRepo: BaseRepo {
queue: &'static str,
worker_id: Uuid,
job_id: JobId,
job_status: JobResult,
) -> Result<(), RepoError>;
}
@ -423,8 +434,9 @@ where
queue: &'static str,
worker_id: Uuid,
job_id: JobId,
job_status: JobResult,
) -> Result<(), RepoError> {
T::complete_job(self, queue, worker_id, job_id).await
T::complete_job(self, queue, worker_id, job_id, job_status).await
}
}
@ -432,7 +444,6 @@ where
pub(crate) trait SettingsRepo: BaseRepo {
async fn set(&self, key: &'static str, value: Arc<[u8]>) -> Result<(), RepoError>;
async fn get(&self, key: &'static str) -> Result<Option<Arc<[u8]>>, RepoError>;
async fn remove(&self, key: &'static str) -> Result<(), RepoError>;
}
#[async_trait::async_trait(?Send)]
@ -447,10 +458,6 @@ where
async fn get(&self, key: &'static str) -> Result<Option<Arc<[u8]>>, RepoError> {
T::get(self, key).await
}
async fn remove(&self, key: &'static str) -> Result<(), RepoError> {
T::remove(self, key).await
}
}
#[async_trait::async_trait(?Send)]
@ -644,20 +651,6 @@ pub(crate) trait HashRepo: BaseRepo {
async fn identifier(&self, hash: Hash) -> Result<Option<Arc<str>>, RepoError>;
async fn relate_variant_identifier(
&self,
hash: Hash,
variant: String,
identifier: &Arc<str>,
) -> Result<Result<(), VariantAlreadyExists>, RepoError>;
async fn variant_identifier(
&self,
hash: Hash,
variant: String,
) -> Result<Option<Arc<str>>, RepoError>;
async fn variants(&self, hash: Hash) -> Result<Vec<(String, Arc<str>)>, RepoError>;
async fn remove_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError>;
async fn relate_blurhash(&self, hash: Hash, blurhash: Arc<str>) -> Result<(), RepoError>;
async fn blurhash(&self, hash: Hash) -> Result<Option<Arc<str>>, RepoError>;
@ -717,6 +710,96 @@ where
T::identifier(self, hash).await
}
async fn relate_blurhash(&self, hash: Hash, blurhash: Arc<str>) -> Result<(), RepoError> {
T::relate_blurhash(self, hash, blurhash).await
}
async fn blurhash(&self, hash: Hash) -> Result<Option<Arc<str>>, RepoError> {
T::blurhash(self, hash).await
}
async fn relate_motion_identifier(
&self,
hash: Hash,
identifier: &Arc<str>,
) -> Result<(), RepoError> {
T::relate_motion_identifier(self, hash, identifier).await
}
async fn motion_identifier(&self, hash: Hash) -> Result<Option<Arc<str>>, RepoError> {
T::motion_identifier(self, hash).await
}
async fn cleanup_hash(&self, hash: Hash) -> Result<(), RepoError> {
T::cleanup_hash(self, hash).await
}
}
#[async_trait::async_trait(?Send)]
pub(crate) trait VariantRepo: BaseRepo {
async fn claim_variant_processing_rights(
&self,
hash: Hash,
variant: String,
) -> Result<Result<(), NotificationEntry>, RepoError>;
async fn variant_waiter(
&self,
hash: Hash,
variant: String,
) -> Result<NotificationEntry, RepoError>;
async fn variant_heartbeat(&self, hash: Hash, variant: String) -> Result<(), RepoError>;
async fn notify_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError>;
async fn relate_variant_identifier(
&self,
hash: Hash,
variant: String,
identifier: &Arc<str>,
) -> Result<Result<(), VariantAlreadyExists>, RepoError>;
async fn variant_identifier(
&self,
hash: Hash,
variant: String,
) -> Result<Option<Arc<str>>, RepoError>;
async fn variants(&self, hash: Hash) -> Result<Vec<(String, Arc<str>)>, RepoError>;
async fn remove_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError>;
}
#[async_trait::async_trait(?Send)]
impl<T> VariantRepo for Arc<T>
where
T: VariantRepo,
{
async fn claim_variant_processing_rights(
&self,
hash: Hash,
variant: String,
) -> Result<Result<(), NotificationEntry>, RepoError> {
T::claim_variant_processing_rights(self, hash, variant).await
}
async fn variant_waiter(
&self,
hash: Hash,
variant: String,
) -> Result<NotificationEntry, RepoError> {
T::variant_waiter(self, hash, variant).await
}
async fn variant_heartbeat(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
T::variant_heartbeat(self, hash, variant).await
}
async fn notify_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
T::notify_variant(self, hash, variant).await
}
async fn relate_variant_identifier(
&self,
hash: Hash,
@ -741,30 +824,6 @@ where
async fn remove_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
T::remove_variant(self, hash, variant).await
}
async fn relate_blurhash(&self, hash: Hash, blurhash: Arc<str>) -> Result<(), RepoError> {
T::relate_blurhash(self, hash, blurhash).await
}
async fn blurhash(&self, hash: Hash) -> Result<Option<Arc<str>>, RepoError> {
T::blurhash(self, hash).await
}
async fn relate_motion_identifier(
&self,
hash: Hash,
identifier: &Arc<str>,
) -> Result<(), RepoError> {
T::relate_motion_identifier(self, hash, identifier).await
}
async fn motion_identifier(&self, hash: Hash) -> Result<Option<Arc<str>>, RepoError> {
T::motion_identifier(self, hash).await
}
async fn cleanup_hash(&self, hash: Hash) -> Result<(), RepoError> {
T::cleanup_hash(self, hash).await
}
}
#[async_trait::async_trait(?Send)]

View file

@ -0,0 +1,92 @@
use dashmap::{mapref::entry::Entry, DashMap};
use std::{
future::Future,
sync::{Arc, Weak},
time::Duration,
};
use tokio::sync::Notify;
use crate::future::WithTimeout;
type Map = Arc<DashMap<Arc<str>, Weak<NotificationEntryInner>>>;
#[derive(Clone)]
pub(super) struct NotificationMap {
map: Map,
}
pub(crate) struct NotificationEntry {
inner: Arc<NotificationEntryInner>,
}
struct NotificationEntryInner {
key: Arc<str>,
map: Map,
notify: Notify,
}
impl NotificationMap {
pub(super) fn new() -> Self {
Self {
map: Arc::new(DashMap::new()),
}
}
pub(super) fn register_interest(&self, key: Arc<str>) -> NotificationEntry {
match self.map.entry(key.clone()) {
Entry::Occupied(mut occupied) => {
if let Some(inner) = occupied.get().upgrade() {
NotificationEntry { inner }
} else {
let inner = Arc::new(NotificationEntryInner {
key,
map: self.map.clone(),
notify: crate::sync::bare_notify(),
});
occupied.insert(Arc::downgrade(&inner));
NotificationEntry { inner }
}
}
Entry::Vacant(vacant) => {
let inner = Arc::new(NotificationEntryInner {
key,
map: self.map.clone(),
notify: crate::sync::bare_notify(),
});
vacant.insert(Arc::downgrade(&inner));
NotificationEntry { inner }
}
}
}
pub(super) fn notify(&self, key: &str) {
if let Some(notifier) = self.map.get(key).and_then(|v| v.upgrade()) {
notifier.notify.notify_waiters();
}
}
}
impl NotificationEntry {
pub(crate) fn notified_timeout(
&mut self,
duration: Duration,
) -> impl Future<Output = Result<(), tokio::time::error::Elapsed>> + '_ {
self.inner.notify.notified().with_timeout(duration)
}
}
impl Default for NotificationMap {
fn default() -> Self {
Self::new()
}
}
impl Drop for NotificationEntryInner {
fn drop(&mut self) {
self.map.remove(&self.key);
}
}

View file

@ -4,6 +4,7 @@ mod schema;
use std::{
collections::{BTreeSet, VecDeque},
future::Future,
path::PathBuf,
sync::{
atomic::{AtomicU64, Ordering},
@ -12,19 +13,20 @@ use std::{
time::{Duration, Instant},
};
use bb8::CustomizeConnection;
use dashmap::DashMap;
use diesel::prelude::*;
use diesel_async::{
pooled_connection::{
deadpool::{BuildError, Hook, Object, Pool, PoolError},
AsyncDieselConnectionManager, ManagerConfig,
bb8::{Pool, PooledConnection, RunError},
AsyncDieselConnectionManager, ManagerConfig, PoolError,
},
AsyncConnection, AsyncPgConnection, RunQueryDsl,
};
use futures_core::Stream;
use tokio::sync::Notify;
use tokio_postgres::{AsyncMessage, Connection, NoTls, Notification, Socket};
use tokio_postgres_rustls::MakeRustlsConnect;
use tokio_postgres_generic_rustls::{AwsLcRsDigest, MakeRustlsConnect};
use tracing::Instrument;
use url::Url;
use uuid::Uuid;
@ -32,7 +34,7 @@ use uuid::Uuid;
use crate::{
details::Details,
error_code::{ErrorCode, OwnedErrorCode},
future::{WithMetrics, WithTimeout},
future::{WithMetrics, WithPollTimer, WithTimeout},
serde_str::Serde,
stream::LocalBoxStream,
sync::DropHandle,
@ -42,10 +44,11 @@ use self::job_status::JobStatus;
use super::{
metrics::{PopMetricsGuard, PushMetricsGuard, WaitMetricsGuard},
notification_map::{NotificationEntry, NotificationMap},
Alias, AliasAccessRepo, AliasAlreadyExists, AliasRepo, BaseRepo, DeleteToken, DetailsRepo,
FullRepo, Hash, HashAlreadyExists, HashPage, HashRepo, JobId, OrderedHash, ProxyRepo,
QueueRepo, RepoError, SettingsRepo, StoreMigrationRepo, UploadId, UploadRepo, UploadResult,
VariantAccessRepo, VariantAlreadyExists,
FullRepo, Hash, HashAlreadyExists, HashPage, HashRepo, JobId, JobResult, OrderedHash,
ProxyRepo, QueueRepo, RepoError, SettingsRepo, StoreMigrationRepo, UploadId, UploadRepo,
UploadResult, VariantAccessRepo, VariantAlreadyExists, VariantRepo,
};
#[derive(Clone)]
@ -61,6 +64,7 @@ struct Inner {
notifier_pool: Pool<AsyncPgConnection>,
queue_notifications: DashMap<String, Arc<Notify>>,
upload_notifications: DashMap<UploadId, Weak<Notify>>,
keyed_notifications: NotificationMap,
}
struct UploadInterest {
@ -80,6 +84,10 @@ struct UploadNotifierState<'a> {
inner: &'a Inner,
}
struct KeyedNotifierState<'a> {
inner: &'a Inner,
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum ConnectPostgresError {
#[error("Failed to connect to postgres for migrations")]
@ -92,16 +100,16 @@ pub(crate) enum ConnectPostgresError {
Migration(#[source] Box<refinery::Error>),
#[error("Failed to build postgres connection pool")]
BuildPool(#[source] BuildError),
BuildPool(#[source] PoolError),
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum PostgresError {
#[error("Error in db pool")]
Pool(#[source] PoolError),
Pool(#[source] RunError),
#[error("Error in database")]
Diesel(#[source] diesel::result::Error),
Diesel(#[from] diesel::result::Error),
#[error("Error deserializing hex value")]
Hex(#[source] hex::FromHexError),
@ -154,22 +162,18 @@ impl PostgresError {
pub(super) const fn is_disconnected(&self) -> bool {
matches!(
self,
Self::Pool(
PoolError::Closed
| PoolError::Backend(
diesel_async::pooled_connection::PoolError::ConnectionError(_)
),
) | Self::Diesel(diesel::result::Error::DatabaseError(
diesel::result::DatabaseErrorKind::ClosedConnection,
_,
))
Self::Pool(RunError::User(PoolError::ConnectionError(_)))
| Self::Diesel(diesel::result::Error::DatabaseError(
diesel::result::DatabaseErrorKind::ClosedConnection,
_,
))
)
}
}
async fn build_tls_connector(
certificate_file: Option<PathBuf>,
) -> Result<MakeRustlsConnect, TlsError> {
) -> Result<MakeRustlsConnect<AwsLcRsDigest>, TlsError> {
let mut cert_store = rustls::RootCertStore {
roots: Vec::from(webpki_roots::TLS_SERVER_ROOTS),
};
@ -195,14 +199,14 @@ async fn build_tls_connector(
.with_root_certificates(cert_store)
.with_no_client_auth();
let tls = MakeRustlsConnect::new(config);
let tls = MakeRustlsConnect::new(config, AwsLcRsDigest);
Ok(tls)
}
async fn connect_for_migrations(
postgres_url: &Url,
tls_connector: Option<MakeRustlsConnect>,
tls_connector: Option<MakeRustlsConnect<AwsLcRsDigest>>,
) -> Result<
(
tokio_postgres::Client,
@ -233,11 +237,37 @@ async fn connect_for_migrations(
Ok(tup)
}
fn build_pool(
#[derive(Debug)]
struct OnConnect;
impl<C, E> CustomizeConnection<C, E> for OnConnect
where
C: Send + 'static,
E: 'static,
{
fn on_acquire<'life0, 'life1, 'async_trait>(
&'life0 self,
_connection: &'life1 mut C,
) -> core::pin::Pin<
Box<dyn core::future::Future<Output = Result<(), E>> + core::marker::Send + 'async_trait>,
>
where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
{
Box::pin(async {
metrics::counter!(crate::init_metrics::POSTGRES_POOL_CONNECTION_CREATE).increment(1);
Ok(())
})
}
}
async fn build_pool(
postgres_url: &Url,
tx: flume::Sender<Notification>,
connector: Option<MakeRustlsConnect>,
max_size: usize,
tx: tokio::sync::mpsc::Sender<Notification>,
connector: Option<MakeRustlsConnect<AwsLcRsDigest>>,
max_size: u32,
) -> Result<Pool<AsyncPgConnection>, ConnectPostgresError> {
let mut config = ManagerConfig::default();
config.custom_setup = build_handler(tx, connector);
@ -247,21 +277,12 @@ fn build_pool(
config,
);
let pool = Pool::builder(mgr)
.runtime(deadpool::Runtime::Tokio1)
.wait_timeout(Some(Duration::from_secs(10)))
.create_timeout(Some(Duration::from_secs(2)))
.recycle_timeout(Some(Duration::from_secs(2)))
.post_create(Hook::sync_fn(|_, _| {
metrics::counter!(crate::init_metrics::POSTGRES_POOL_CONNECTION_CREATE).increment(1);
Ok(())
}))
.post_recycle(Hook::sync_fn(|_, _| {
metrics::counter!(crate::init_metrics::POSTGRES_POOL_CONNECTION_RECYCLE).increment(1);
Ok(())
}))
let pool = Pool::builder()
.max_size(max_size)
.build()
.connection_timeout(Duration::from_secs(10))
.connection_customizer(Box::new(OnConnect))
.build(mgr)
.await
.map_err(ConnectPostgresError::BuildPool)?;
Ok(pool)
@ -298,22 +319,29 @@ impl PostgresRepo {
.map(|u| u.into())
.unwrap_or(1_usize);
let (tx, rx) = flume::bounded(10);
let (tx, rx) = crate::sync::channel(10);
let pool = build_pool(
&postgres_url,
tx.clone(),
connector.clone(),
parallelism as u32 * 8,
)
.await?;
let notifier_pool =
build_pool(&postgres_url, tx, connector, parallelism.min(4) as u32).await?;
let inner = Arc::new(Inner {
health_count: AtomicU64::new(0),
pool: build_pool(
&postgres_url,
tx.clone(),
connector.clone(),
parallelism * 8,
)?,
notifier_pool: build_pool(&postgres_url, tx, connector, parallelism.min(4))?,
pool,
notifier_pool,
queue_notifications: DashMap::new(),
upload_notifications: DashMap::new(),
keyed_notifications: NotificationMap::new(),
});
let handle = crate::sync::abort_on_drop(crate::sync::spawn(
let handle = crate::sync::abort_on_drop(crate::sync::spawn_sendable(
"postgres-delegate-notifications",
delegate_notifications(rx, inner.clone(), parallelism * 8),
));
@ -326,15 +354,114 @@ impl PostgresRepo {
})
}
async fn get_connection(&self) -> Result<Object<AsyncPgConnection>, PostgresError> {
self.inner.get_connection().await
async fn get_connection(
&self,
) -> Result<PooledConnection<'_, AsyncPgConnection>, PostgresError> {
self.inner
.get_connection()
.with_poll_timer("postgres-get-connection")
.await
}
async fn get_notifier_connection(&self) -> Result<Object<AsyncPgConnection>, PostgresError> {
self.inner.get_notifier_connection().await
async fn get_notifier_connection(
&self,
) -> Result<PooledConnection<'_, AsyncPgConnection>, PostgresError> {
self.inner
.get_notifier_connection()
.with_poll_timer("postgres-get-notifier-connection")
.await
}
async fn insert_keyed_notifier(
&self,
input_key: &str,
) -> Result<Result<(), AlreadyInserted>, PostgresError> {
use schema::keyed_notifications::dsl::*;
let mut conn = self.get_connection().await?;
let timestamp = to_primitive(time::OffsetDateTime::now_utc());
diesel::delete(keyed_notifications)
.filter(heartbeat.le(timestamp.saturating_sub(time::Duration::minutes(2))))
.execute(&mut conn)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
let res = diesel::insert_into(keyed_notifications)
.values(key.eq(input_key))
.execute(&mut conn)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?;
match res {
Ok(_) => Ok(Ok(())),
Err(diesel::result::Error::DatabaseError(
diesel::result::DatabaseErrorKind::UniqueViolation,
_,
)) => Ok(Err(AlreadyInserted)),
Err(e) => Err(PostgresError::Diesel(e)),
}
}
async fn keyed_notifier_heartbeat(&self, input_key: &str) -> Result<(), PostgresError> {
use schema::keyed_notifications::dsl::*;
let mut conn = self.get_connection().await?;
let timestamp = to_primitive(time::OffsetDateTime::now_utc());
diesel::update(keyed_notifications)
.filter(key.eq(input_key))
.set(heartbeat.eq(timestamp))
.execute(&mut conn)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
Ok(())
}
fn listen_on_key(&self, key: Arc<str>) -> NotificationEntry {
self.inner.keyed_notifications.register_interest(key)
}
async fn register_interest(&self) -> Result<(), PostgresError> {
let mut notifier_conn = self.get_notifier_connection().await?;
diesel::sql_query("LISTEN keyed_notification_channel;")
.execute(&mut notifier_conn)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
Ok(())
}
async fn clear_keyed_notifier(&self, input_key: String) -> Result<(), PostgresError> {
use schema::keyed_notifications::dsl::*;
let mut conn = self.get_connection().await?;
diesel::delete(keyed_notifications)
.filter(key.eq(input_key))
.execute(&mut conn)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
Ok(())
}
}
struct AlreadyInserted;
struct GetConnectionMetricsGuard {
start: Instant,
armed: bool,
@ -363,7 +490,9 @@ impl Drop for GetConnectionMetricsGuard {
impl Inner {
#[tracing::instrument(level = "trace", skip(self))]
async fn get_connection(&self) -> Result<Object<AsyncPgConnection>, PostgresError> {
async fn get_connection(
&self,
) -> Result<PooledConnection<'_, AsyncPgConnection>, PostgresError> {
let guard = GetConnectionMetricsGuard::guard();
let obj = self.pool.get().await.map_err(PostgresError::Pool)?;
@ -374,7 +503,9 @@ impl Inner {
}
#[tracing::instrument(level = "trace", skip(self))]
async fn get_notifier_connection(&self) -> Result<Object<AsyncPgConnection>, PostgresError> {
async fn get_notifier_connection(
&self,
) -> Result<PooledConnection<'_, AsyncPgConnection>, PostgresError> {
let guard = GetConnectionMetricsGuard::guard();
let obj = self
@ -403,13 +534,15 @@ impl Inner {
}
impl UploadInterest {
async fn notified_timeout(&self, timeout: Duration) -> Result<(), tokio::time::error::Elapsed> {
fn notified_timeout(
&self,
timeout: Duration,
) -> impl Future<Output = Result<(), tokio::time::error::Elapsed>> + '_ {
self.interest
.as_ref()
.expect("interest exists")
.notified()
.with_timeout(timeout)
.await
}
}
@ -477,12 +610,18 @@ impl<'a> UploadNotifierState<'a> {
}
}
impl<'a> KeyedNotifierState<'a> {
fn handle(&self, key: &str) {
self.inner.keyed_notifications.notify(key);
}
}
type BoxFuture<'a, T> = std::pin::Pin<Box<dyn std::future::Future<Output = T> + Send + 'a>>;
type ConfigFn =
Box<dyn Fn(&str) -> BoxFuture<'_, ConnectionResult<AsyncPgConnection>> + Send + Sync + 'static>;
async fn delegate_notifications(
receiver: flume::Receiver<Notification>,
mut receiver: tokio::sync::mpsc::Receiver<Notification>,
inner: Arc<Inner>,
capacity: usize,
) {
@ -495,7 +634,9 @@ async fn delegate_notifications(
let upload_notifier_state = UploadNotifierState { inner: &inner };
while let Ok(notification) = receiver.recv_async().await {
let keyed_notifier_state = KeyedNotifierState { inner: &inner };
while let Some(notification) = receiver.recv().await {
tracing::trace!("delegate_notifications: looping");
metrics::counter!(crate::init_metrics::POSTGRES_NOTIFICATION).increment(1);
@ -508,6 +649,10 @@ async fn delegate_notifications(
// new upload finished
upload_notifier_state.handle(notification.payload());
}
"keyed_notification_channel" => {
// new keyed notification
keyed_notifier_state.handle(notification.payload());
}
channel => {
tracing::info!(
"Unhandled postgres notification: {channel}: {}",
@ -521,8 +666,8 @@ async fn delegate_notifications(
}
fn build_handler(
sender: flume::Sender<Notification>,
connector: Option<MakeRustlsConnect>,
sender: tokio::sync::mpsc::Sender<Notification>,
connector: Option<MakeRustlsConnect<AwsLcRsDigest>>,
) -> ConfigFn {
Box::new(
move |config: &str| -> BoxFuture<'_, ConnectionResult<AsyncPgConnection>> {
@ -563,13 +708,16 @@ fn build_handler(
}
fn spawn_db_notification_task<S>(
sender: flume::Sender<Notification>,
sender: tokio::sync::mpsc::Sender<Notification>,
mut conn: Connection<Socket, S>,
) where
S: tokio_postgres::tls::TlsStream + Unpin + 'static,
S: tokio_postgres::tls::TlsStream + Send + Unpin + 'static,
{
crate::sync::spawn("postgres-notifications", async move {
while let Some(res) = std::future::poll_fn(|cx| conn.poll_message(cx)).await {
crate::sync::spawn_sendable("postgres-notifications", async move {
while let Some(res) = std::future::poll_fn(|cx| conn.poll_message(cx))
.with_poll_timer("poll-message")
.await
{
tracing::trace!("db_notification_task: looping");
match res {
@ -581,7 +729,7 @@ fn spawn_db_notification_task<S>(
tracing::warn!("Database Notice {e:?}");
}
Ok(AsyncMessage::Notification(notification)) => {
if sender.send_async(notification).await.is_err() {
if sender.send(notification).await.is_err() {
tracing::warn!("Missed notification. Are we shutting down?");
}
}
@ -826,110 +974,6 @@ impl HashRepo for PostgresRepo {
Ok(opt.map(Arc::from))
}
#[tracing::instrument(level = "debug", skip(self))]
async fn relate_variant_identifier(
&self,
input_hash: Hash,
input_variant: String,
input_identifier: &Arc<str>,
) -> Result<Result<(), VariantAlreadyExists>, RepoError> {
use schema::variants::dsl::*;
let mut conn = self.get_connection().await?;
let res = diesel::insert_into(variants)
.values((
hash.eq(&input_hash),
variant.eq(&input_variant),
identifier.eq(input_identifier.as_ref()),
))
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_VARIANTS_RELATE_VARIANT_IDENTIFIER)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?;
match res {
Ok(_) => Ok(Ok(())),
Err(diesel::result::Error::DatabaseError(
diesel::result::DatabaseErrorKind::UniqueViolation,
_,
)) => Ok(Err(VariantAlreadyExists)),
Err(e) => Err(PostgresError::Diesel(e).into()),
}
}
#[tracing::instrument(level = "debug", skip(self))]
async fn variant_identifier(
&self,
input_hash: Hash,
input_variant: String,
) -> Result<Option<Arc<str>>, RepoError> {
use schema::variants::dsl::*;
let mut conn = self.get_connection().await?;
let opt = variants
.select(identifier)
.filter(hash.eq(&input_hash))
.filter(variant.eq(&input_variant))
.get_result::<String>(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_VARIANTS_IDENTIFIER)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.optional()
.map_err(PostgresError::Diesel)?
.map(Arc::from);
Ok(opt)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn variants(&self, input_hash: Hash) -> Result<Vec<(String, Arc<str>)>, RepoError> {
use schema::variants::dsl::*;
let mut conn = self.get_connection().await?;
let vec = variants
.select((variant, identifier))
.filter(hash.eq(&input_hash))
.get_results::<(String, String)>(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_VARIANTS_FOR_HASH)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?
.into_iter()
.map(|(s, i)| (s, Arc::from(i)))
.collect();
Ok(vec)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn remove_variant(
&self,
input_hash: Hash,
input_variant: String,
) -> Result<(), RepoError> {
use schema::variants::dsl::*;
let mut conn = self.get_connection().await?;
diesel::delete(variants)
.filter(hash.eq(&input_hash))
.filter(variant.eq(&input_variant))
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_VARIANTS_REMOVE)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
Ok(())
}
#[tracing::instrument(level = "debug", skip(self))]
async fn relate_blurhash(
&self,
@ -1046,6 +1090,167 @@ impl HashRepo for PostgresRepo {
}
}
#[async_trait::async_trait(?Send)]
impl VariantRepo for PostgresRepo {
#[tracing::instrument(level = "debug", skip(self))]
async fn claim_variant_processing_rights(
&self,
hash: Hash,
variant: String,
) -> Result<Result<(), NotificationEntry>, RepoError> {
let key = Arc::from(format!("{}{variant}", hash.to_base64()));
let entry = self.listen_on_key(Arc::clone(&key));
self.register_interest().await?;
if self
.variant_identifier(hash.clone(), variant.clone())
.await?
.is_some()
{
return Ok(Err(entry));
}
match self.insert_keyed_notifier(&key).await? {
Ok(()) => Ok(Ok(())),
Err(AlreadyInserted) => Ok(Err(entry)),
}
}
async fn variant_waiter(
&self,
hash: Hash,
variant: String,
) -> Result<NotificationEntry, RepoError> {
let key = Arc::from(format!("{}{variant}", hash.to_base64()));
let entry = self.listen_on_key(key);
self.register_interest().await?;
Ok(entry)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn variant_heartbeat(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
let key = format!("{}{variant}", hash.to_base64());
self.keyed_notifier_heartbeat(&key)
.await
.map_err(Into::into)
}
#[tracing::instrument(level = "trace", skip(self))]
async fn notify_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
let key = format!("{}{variant}", hash.to_base64());
self.clear_keyed_notifier(key).await.map_err(Into::into)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn relate_variant_identifier(
&self,
input_hash: Hash,
input_variant: String,
input_identifier: &Arc<str>,
) -> Result<Result<(), VariantAlreadyExists>, RepoError> {
use schema::variants::dsl::*;
let mut conn = self.get_connection().await?;
let res = diesel::insert_into(variants)
.values((
hash.eq(&input_hash),
variant.eq(&input_variant),
identifier.eq(input_identifier.to_string()),
))
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_VARIANTS_RELATE_VARIANT_IDENTIFIER)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?;
match res {
Ok(_) => Ok(Ok(())),
Err(diesel::result::Error::DatabaseError(
diesel::result::DatabaseErrorKind::UniqueViolation,
_,
)) => Ok(Err(VariantAlreadyExists)),
Err(e) => Err(PostgresError::Diesel(e).into()),
}
}
#[tracing::instrument(level = "debug", skip(self))]
async fn variant_identifier(
&self,
input_hash: Hash,
input_variant: String,
) -> Result<Option<Arc<str>>, RepoError> {
use schema::variants::dsl::*;
let mut conn = self.get_connection().await?;
let opt = variants
.select(identifier)
.filter(hash.eq(&input_hash))
.filter(variant.eq(&input_variant))
.get_result::<String>(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_VARIANTS_IDENTIFIER)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.optional()
.map_err(PostgresError::Diesel)?
.map(Arc::from);
Ok(opt)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn variants(&self, input_hash: Hash) -> Result<Vec<(String, Arc<str>)>, RepoError> {
use schema::variants::dsl::*;
let mut conn = self.get_connection().await?;
let vec = variants
.select((variant, identifier))
.filter(hash.eq(&input_hash))
.get_results::<(String, String)>(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_VARIANTS_FOR_HASH)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?
.into_iter()
.map(|(s, i)| (s, Arc::from(i)))
.collect();
Ok(vec)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn remove_variant(
&self,
input_hash: Hash,
input_variant: String,
) -> Result<(), RepoError> {
use schema::variants::dsl::*;
let mut conn = self.get_connection().await?;
diesel::delete(variants)
.filter(hash.eq(&input_hash))
.filter(variant.eq(&input_variant))
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_VARIANTS_REMOVE)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
Ok(())
}
}
#[async_trait::async_trait(?Send)]
impl AliasRepo for PostgresRepo {
#[tracing::instrument(level = "debug", skip(self))]
@ -1207,24 +1412,6 @@ impl SettingsRepo for PostgresRepo {
Ok(opt)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn remove(&self, input_key: &'static str) -> Result<(), RepoError> {
use schema::settings::dsl::*;
let mut conn = self.get_connection().await?;
diesel::delete(settings)
.filter(key.eq(input_key))
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_SETTINGS_REMOVE)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
Ok(())
}
}
#[async_trait::async_trait(?Send)]
@ -1242,16 +1429,22 @@ impl DetailsRepo for PostgresRepo {
let value =
serde_json::to_value(&input_details.inner).map_err(PostgresError::SerializeDetails)?;
diesel::insert_into(details)
let res = diesel::insert_into(details)
.values((identifier.eq(input_identifier.as_ref()), json.eq(&value)))
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_DETAILS_RELATE)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
.map_err(|_| PostgresError::DbTimeout)?;
Ok(())
match res {
Ok(_)
| Err(diesel::result::Error::DatabaseError(
diesel::result::DatabaseErrorKind::UniqueViolation,
_,
)) => Ok(()),
Err(e) => Err(PostgresError::Diesel(e).into()),
}
}
#[tracing::instrument(level = "debug", skip(self))]
@ -1358,7 +1551,7 @@ impl QueueRepo for PostgresRepo {
}
}
#[tracing::instrument(level = "debug", skip(self))]
#[tracing::instrument(level = "debug", skip_all, fields(job_id))]
async fn pop(
&self,
queue_name: &'static str,
@ -1384,6 +1577,7 @@ impl QueueRepo for PostgresRepo {
.execute(&mut notifier_conn)
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_LISTEN)
.with_timeout(Duration::from_secs(5))
.with_poll_timer("pop-listen")
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
@ -1404,6 +1598,7 @@ impl QueueRepo for PostgresRepo {
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_REQUEUE)
.with_timeout(Duration::from_secs(5))
.with_poll_timer("pop-reset-jobs")
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
@ -1420,7 +1615,8 @@ impl QueueRepo for PostgresRepo {
queue_alias
.field(status)
.eq(JobStatus::New)
.and(queue_alias.field(queue).eq(queue_name)),
.and(queue_alias.field(queue).eq(queue_name))
.and(queue_alias.field(retry).ge(1)),
)
.order(queue_alias.field(queue_time))
.for_update()
@ -1439,26 +1635,29 @@ impl QueueRepo for PostgresRepo {
.get_result(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_CLAIM)
.with_timeout(Duration::from_secs(5))
.with_poll_timer("pop-claim-job")
.await
.map_err(|_| PostgresError::DbTimeout)?
.optional()
.map_err(PostgresError::Diesel)?;
if let Some((job_id, job_json)) = opt {
tracing::Span::current().record("job_id", &format!("{job_id}"));
guard.disarm();
tracing::debug!("{job_json}");
return Ok((JobId(job_id), job_json));
}
drop(conn);
if notifier
match notifier
.notified()
.with_timeout(Duration::from_secs(5))
.with_poll_timer("pop-wait-notify")
.await
.is_ok()
{
tracing::debug!("Notified");
} else {
tracing::debug!("Timed out");
Ok(()) => tracing::debug!("Notified"),
Err(_) => tracing::trace!("Timed out"),
}
}
}
@ -1499,23 +1698,62 @@ impl QueueRepo for PostgresRepo {
queue_name: &'static str,
worker_id: Uuid,
job_id: JobId,
job_status: JobResult,
) -> Result<(), RepoError> {
use schema::job_queue::dsl::*;
let mut conn = self.get_connection().await?;
diesel::delete(job_queue)
.filter(
id.eq(job_id.0)
.and(queue.eq(queue_name))
.and(worker.eq(worker_id)),
)
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_COMPLETE)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
let count = if matches!(job_status, JobResult::Failure) {
diesel::update(job_queue)
.filter(
id.eq(job_id.0)
.and(queue.eq(queue_name))
.and(worker.eq(worker_id)),
)
.set((retry.eq(retry - 1), worker.eq(Option::<Uuid>::None)))
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_RETRY)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?;
diesel::delete(job_queue)
.filter(id.eq(job_id.0).and(retry.le(0)))
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_CLEANUP)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?
} else {
diesel::delete(job_queue)
.filter(
id.eq(job_id.0)
.and(queue.eq(queue_name))
.and(worker.eq(worker_id)),
)
.execute(&mut conn)
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_COMPLETE)
.with_timeout(Duration::from_secs(5))
.await
.map_err(|_| PostgresError::DbTimeout)?
.map_err(PostgresError::Diesel)?
};
match job_status {
JobResult::Success => tracing::debug!("completed {job_id:?}"),
JobResult::Failure if count == 0 => {
tracing::info!("{job_id:?} failed, marked for retry")
}
JobResult::Failure => tracing::warn!("{job_id:?} failed permantently"),
JobResult::Aborted => tracing::warn!("{job_id:?} dead"),
}
if count > 0 {
tracing::debug!("Deleted {count} jobs");
}
Ok(())
}

View file

@ -0,0 +1,12 @@
use barrel::backend::Pg;
use barrel::{types, Migration};
pub(crate) fn migration() -> String {
let mut m = Migration::new();
m.change_table("job_queue", |t| {
t.add_column("retry", types::integer().nullable(false).default(5));
});
m.make::<Pg>().to_string()
}

View file

@ -0,0 +1,50 @@
use barrel::backend::Pg;
use barrel::functions::AutogenFunction;
use barrel::{types, Migration};
pub(crate) fn migration() -> String {
let mut m = Migration::new();
m.create_table("keyed_notifications", |t| {
t.add_column(
"key",
types::text().primary(true).unique(true).nullable(false),
);
t.add_column(
"heartbeat",
types::datetime()
.nullable(false)
.default(AutogenFunction::CurrentTimestamp),
);
t.add_index(
"keyed_notifications_heartbeat_index",
types::index(["heartbeat"]),
);
});
m.inject_custom(
r#"
CREATE OR REPLACE FUNCTION keyed_notify()
RETURNS trigger AS
$$
BEGIN
PERFORM pg_notify('keyed_notification_channel', OLD.key);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
"#
.trim(),
);
m.inject_custom(
r#"
CREATE TRIGGER keyed_notification_removed
AFTER DELETE
ON keyed_notifications
FOR EACH ROW
EXECUTE PROCEDURE keyed_notify();
"#,
);
m.make::<Pg>().to_string()
}

View file

@ -44,6 +44,14 @@ diesel::table! {
queue_time -> Timestamp,
heartbeat -> Nullable<Timestamp>,
unique_key -> Nullable<Text>,
retry -> Int4,
}
}
diesel::table! {
keyed_notifications (key) {
key -> Text,
heartbeat -> Timestamp,
}
}
@ -108,6 +116,7 @@ diesel::allow_tables_to_appear_in_same_query!(
details,
hashes,
job_queue,
keyed_notifications,
proxies,
refinery_schema_history,
settings,

View file

@ -1,9 +1,11 @@
use crate::{
details::HumanDate,
error_code::{ErrorCode, OwnedErrorCode},
future::{WithPollTimer, WithTimeout},
serde_str::Serde,
stream::{from_iterator, LocalBoxStream},
};
use dashmap::DashMap;
use sled::{transaction::TransactionError, Db, IVec, Transactional, Tree};
use std::{
collections::HashMap,
@ -12,6 +14,7 @@ use std::{
atomic::{AtomicU64, Ordering},
Arc, RwLock,
},
time::Duration,
};
use tokio::sync::Notify;
use url::Url;
@ -20,10 +23,11 @@ use uuid::Uuid;
use super::{
hash::Hash,
metrics::{PopMetricsGuard, PushMetricsGuard, WaitMetricsGuard},
notification_map::{NotificationEntry, NotificationMap},
Alias, AliasAccessRepo, AliasAlreadyExists, AliasRepo, BaseRepo, DeleteToken, Details,
DetailsRepo, FullRepo, HashAlreadyExists, HashPage, HashRepo, JobId, OrderedHash, ProxyRepo,
QueueRepo, RepoError, SettingsRepo, StoreMigrationRepo, UploadId, UploadRepo, UploadResult,
VariantAccessRepo, VariantAlreadyExists,
DetailsRepo, FullRepo, HashAlreadyExists, HashPage, HashRepo, JobId, JobResult, OrderedHash,
ProxyRepo, QueueRepo, RepoError, SettingsRepo, StoreMigrationRepo, UploadId, UploadRepo,
UploadResult, VariantAccessRepo, VariantAlreadyExists, VariantRepo,
};
macro_rules! b {
@ -99,6 +103,7 @@ pub(crate) struct SledRepo {
unique_jobs: Tree,
unique_jobs_inverse: Tree,
job_state: Tree,
job_retries: Tree,
alias_access: Tree,
inverse_alias_access: Tree,
variant_access: Tree,
@ -110,6 +115,8 @@ pub(crate) struct SledRepo {
migration_identifiers: Tree,
cache_capacity: u64,
export_path: PathBuf,
variant_process_map: DashMap<(Hash, String), time::OffsetDateTime>,
notifications: NotificationMap,
db: Db,
}
@ -141,6 +148,7 @@ impl SledRepo {
unique_jobs: db.open_tree("pict-rs-unique-jobs-tree")?,
unique_jobs_inverse: db.open_tree("pict-rs-unique-jobs-inverse-tree")?,
job_state: db.open_tree("pict-rs-job-state-tree")?,
job_retries: db.open_tree("pict-rs-job-retries-tree")?,
alias_access: db.open_tree("pict-rs-alias-access-tree")?,
inverse_alias_access: db.open_tree("pict-rs-inverse-alias-access-tree")?,
variant_access: db.open_tree("pict-rs-variant-access-tree")?,
@ -152,6 +160,8 @@ impl SledRepo {
migration_identifiers: db.open_tree("pict-rs-migration-identifiers-tree")?,
cache_capacity,
export_path,
variant_process_map: DashMap::new(),
notifications: NotificationMap::new(),
db,
})
}
@ -653,28 +663,37 @@ impl QueueRepo for SledRepo {
let unique_jobs = self.unique_jobs.clone();
let unique_jobs_inverse = self.unique_jobs_inverse.clone();
let job_state = self.job_state.clone();
let job_retries = self.job_retries.clone();
let res = crate::sync::spawn_blocking("sled-io", move || {
(&queue, &unique_jobs, &unique_jobs_inverse, &job_state).transaction(
|(queue, unique_jobs, unique_jobs_inverse, job_state)| {
let state = JobState::pending();
queue.insert(&key[..], &job[..])?;
if let Some(unique_key) = unique_key {
if unique_jobs
.insert(unique_key.as_bytes(), &key[..])?
.is_some()
{
return sled::transaction::abort(());
}
unique_jobs_inverse.insert(&key[..], unique_key.as_bytes())?;
}
job_state.insert(&key[..], state.as_bytes())?;
Ok(())
},
(
&queue,
&unique_jobs,
&unique_jobs_inverse,
&job_state,
&job_retries,
)
.transaction(
|(queue, unique_jobs, unique_jobs_inverse, job_state, job_retries)| {
let state = JobState::pending();
queue.insert(&key[..], &job[..])?;
if let Some(unique_key) = unique_key {
if unique_jobs
.insert(unique_key.as_bytes(), &key[..])?
.is_some()
{
return sled::transaction::abort(());
}
unique_jobs_inverse.insert(&key[..], unique_key.as_bytes())?;
}
job_state.insert(&key[..], state.as_bytes())?;
job_retries.insert(&key[..], &(5_u64.to_be_bytes())[..])?;
Ok(())
},
)
})
.await
.map_err(|_| RepoError::Canceled)?;
@ -703,7 +722,7 @@ impl QueueRepo for SledRepo {
Ok(Some(id))
}
#[tracing::instrument(skip(self, worker_id), fields(job_id))]
#[tracing::instrument(skip_all, fields(job_id))]
async fn pop(
&self,
queue_name: &'static str,
@ -719,7 +738,6 @@ impl QueueRepo for SledRepo {
let queue = self.queue.clone();
let job_state = self.job_state.clone();
let span = tracing::Span::current();
let opt = crate::sync::spawn_blocking("sled-io", move || {
// Job IDs are generated with Uuid version 7 - defining their first bits as a
// timestamp. Scanning a prefix should give us jobs in the order they were queued.
@ -760,8 +778,6 @@ impl QueueRepo for SledRepo {
let job_id = JobId::from_bytes(id_bytes);
span.record("job_id", &format!("{job_id:?}"));
let opt = queue
.get(&key)?
.map(|ivec| serde_json::from_slice(&ivec[..]))
@ -774,12 +790,16 @@ impl QueueRepo for SledRepo {
Ok(None)
})
.with_poll_timer("sled-pop-spawn-blocking")
.await
.map_err(|_| RepoError::Canceled)??;
if let Some(tup) = opt {
if let Some((job_id, job_json)) = opt {
tracing::Span::current().record("job_id", &format!("{}", job_id.0));
metrics_guard.disarm();
return Ok(tup);
tracing::debug!("{job_json}");
return Ok((job_id, job_json));
}
let opt = self
@ -787,7 +807,7 @@ impl QueueRepo for SledRepo {
.read()
.unwrap()
.get(&queue_name)
.map(Arc::clone);
.cloned();
let notify = if let Some(notify) = opt {
notify
@ -797,7 +817,15 @@ impl QueueRepo for SledRepo {
Arc::clone(entry)
};
notify.notified().await
match notify
.notified()
.with_timeout(Duration::from_secs(30))
.with_poll_timer("sled-pop-notify")
.await
{
Ok(()) => tracing::debug!("Notified"),
Err(_) => tracing::trace!("Timed out"),
}
}
}
@ -836,31 +864,66 @@ impl QueueRepo for SledRepo {
queue_name: &'static str,
_worker_id: Uuid,
job_id: JobId,
job_status: JobResult,
) -> Result<(), RepoError> {
let retry = matches!(job_status, JobResult::Failure);
let key = job_key(queue_name, job_id);
let queue = self.queue.clone();
let unique_jobs = self.unique_jobs.clone();
let unique_jobs_inverse = self.unique_jobs_inverse.clone();
let job_state = self.job_state.clone();
let job_retries = self.job_retries.clone();
let res = crate::sync::spawn_blocking("sled-io", move || {
(&queue, &unique_jobs, &unique_jobs_inverse, &job_state).transaction(
|(queue, unique_jobs, unique_jobs_inverse, job_state)| {
queue.remove(&key[..])?;
if let Some(unique_key) = unique_jobs_inverse.remove(&key[..])? {
unique_jobs.remove(unique_key)?;
}
job_state.remove(&key[..])?;
Ok(())
},
(
&queue,
&unique_jobs,
&unique_jobs_inverse,
&job_state,
&job_retries,
)
.transaction(
|(queue, unique_jobs, unique_jobs_inverse, job_state, job_retries)| {
let retries = job_retries.get(&key[..])?;
let retry_count = retries
.and_then(|ivec| ivec[0..8].try_into().ok())
.map(u64::from_be_bytes)
.unwrap_or(5_u64)
.saturating_sub(1);
if retry_count > 0 && retry {
job_retries.insert(&key[..], &(retry_count.to_be_bytes())[..])?;
} else {
queue.remove(&key[..])?;
if let Some(unique_key) = unique_jobs_inverse.remove(&key[..])? {
unique_jobs.remove(unique_key)?;
}
job_state.remove(&key[..])?;
job_retries.remove(&key[..])?;
}
Ok(retry_count > 0 && retry)
},
)
})
.await
.map_err(|_| RepoError::Canceled)?;
if let Err(TransactionError::Abort(e) | TransactionError::Storage(e)) = res {
return Err(RepoError::from(SledError::from(e)));
match res {
Err(TransactionError::Abort(e) | TransactionError::Storage(e)) => {
return Err(RepoError::from(SledError::from(e)));
}
Ok(retried) => match job_status {
JobResult::Success => tracing::debug!("completed {job_id:?}"),
JobResult::Failure if retried => {
tracing::info!("{job_id:?} failed, marked for retry")
}
JobResult::Failure => tracing::warn!("{job_id:?} failed permantently"),
JobResult::Aborted => tracing::warn!("{job_id:?} dead"),
},
}
Ok(())
@ -882,13 +945,6 @@ impl SettingsRepo for SledRepo {
Ok(opt.map(|ivec| Arc::from(ivec.to_vec())))
}
#[tracing::instrument(level = "trace", skip(self))]
async fn remove(&self, key: &'static str) -> Result<(), RepoError> {
b!(self.settings, settings.remove(key));
Ok(())
}
}
fn variant_access_key(hash: &[u8], variant: &str) -> Vec<u8> {
@ -1274,88 +1330,6 @@ impl HashRepo for SledRepo {
Ok(opt.map(try_into_arc_str).transpose()?)
}
#[tracing::instrument(level = "trace", skip(self))]
async fn relate_variant_identifier(
&self,
hash: Hash,
variant: String,
identifier: &Arc<str>,
) -> Result<Result<(), VariantAlreadyExists>, RepoError> {
let hash = hash.to_bytes();
let key = variant_key(&hash, &variant);
let value = identifier.clone();
let hash_variant_identifiers = self.hash_variant_identifiers.clone();
crate::sync::spawn_blocking("sled-io", move || {
hash_variant_identifiers
.compare_and_swap(key, Option::<&[u8]>::None, Some(value.as_bytes()))
.map(|res| res.map_err(|_| VariantAlreadyExists))
})
.await
.map_err(|_| RepoError::Canceled)?
.map_err(SledError::from)
.map_err(RepoError::from)
}
#[tracing::instrument(level = "trace", skip(self))]
async fn variant_identifier(
&self,
hash: Hash,
variant: String,
) -> Result<Option<Arc<str>>, RepoError> {
let hash = hash.to_bytes();
let key = variant_key(&hash, &variant);
let opt = b!(
self.hash_variant_identifiers,
hash_variant_identifiers.get(key)
);
Ok(opt.map(try_into_arc_str).transpose()?)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn variants(&self, hash: Hash) -> Result<Vec<(String, Arc<str>)>, RepoError> {
let hash = hash.to_ivec();
let vec = b!(
self.hash_variant_identifiers,
Ok(hash_variant_identifiers
.scan_prefix(hash.clone())
.filter_map(|res| res.ok())
.filter_map(|(key, ivec)| {
let identifier = try_into_arc_str(ivec).ok();
let variant = variant_from_key(&hash, &key);
if variant.is_none() {
tracing::warn!("Skipping a variant: {}", String::from_utf8_lossy(&key));
}
Some((variant?, identifier?))
})
.collect::<Vec<_>>()) as Result<Vec<_>, SledError>
);
Ok(vec)
}
#[tracing::instrument(level = "trace", skip(self))]
async fn remove_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
let hash = hash.to_bytes();
let key = variant_key(&hash, &variant);
b!(
self.hash_variant_identifiers,
hash_variant_identifiers.remove(key)
);
Ok(())
}
#[tracing::instrument(level = "trace", skip(self))]
async fn relate_blurhash(&self, hash: Hash, blurhash: Arc<str>) -> Result<(), RepoError> {
b!(
@ -1471,6 +1445,167 @@ impl HashRepo for SledRepo {
}
}
#[async_trait::async_trait(?Send)]
impl VariantRepo for SledRepo {
#[tracing::instrument(level = "trace", skip(self))]
async fn claim_variant_processing_rights(
&self,
hash: Hash,
variant: String,
) -> Result<Result<(), NotificationEntry>, RepoError> {
let key = (hash.clone(), variant.clone());
let now = time::OffsetDateTime::now_utc();
let entry = self
.notifications
.register_interest(Arc::from(format!("{}{variant}", hash.to_base64())));
match self.variant_process_map.entry(key.clone()) {
dashmap::mapref::entry::Entry::Occupied(mut occupied_entry) => {
if occupied_entry
.get()
.saturating_add(time::Duration::minutes(2))
> now
{
return Ok(Err(entry));
}
occupied_entry.insert(now);
}
dashmap::mapref::entry::Entry::Vacant(vacant_entry) => {
vacant_entry.insert(now);
}
}
if self.variant_identifier(hash, variant).await?.is_some() {
self.variant_process_map.remove(&key);
return Ok(Err(entry));
}
Ok(Ok(()))
}
async fn variant_waiter(
&self,
hash: Hash,
variant: String,
) -> Result<NotificationEntry, RepoError> {
let entry = self
.notifications
.register_interest(Arc::from(format!("{}{variant}", hash.to_base64())));
Ok(entry)
}
#[tracing::instrument(level = "trace", skip(self))]
async fn variant_heartbeat(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
let key = (hash, variant);
let now = time::OffsetDateTime::now_utc();
if let dashmap::mapref::entry::Entry::Occupied(mut occupied_entry) =
self.variant_process_map.entry(key)
{
occupied_entry.insert(now);
}
Ok(())
}
#[tracing::instrument(level = "trace", skip(self))]
async fn notify_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
let key = (hash.clone(), variant.clone());
self.variant_process_map.remove(&key);
let key = format!("{}{variant}", hash.to_base64());
self.notifications.notify(&key);
Ok(())
}
#[tracing::instrument(level = "trace", skip(self))]
async fn relate_variant_identifier(
&self,
hash: Hash,
variant: String,
identifier: &Arc<str>,
) -> Result<Result<(), VariantAlreadyExists>, RepoError> {
let hash = hash.to_bytes();
let key = variant_key(&hash, &variant);
let value = identifier.clone();
let hash_variant_identifiers = self.hash_variant_identifiers.clone();
let out = crate::sync::spawn_blocking("sled-io", move || {
hash_variant_identifiers
.compare_and_swap(key, Option::<&[u8]>::None, Some(value.as_bytes()))
.map(|res| res.map_err(|_| VariantAlreadyExists))
})
.await
.map_err(|_| RepoError::Canceled)?
.map_err(SledError::from)
.map_err(RepoError::from)?;
Ok(out)
}
#[tracing::instrument(level = "trace", skip(self))]
async fn variant_identifier(
&self,
hash: Hash,
variant: String,
) -> Result<Option<Arc<str>>, RepoError> {
let hash = hash.to_bytes();
let key = variant_key(&hash, &variant);
let opt = b!(
self.hash_variant_identifiers,
hash_variant_identifiers.get(key)
);
Ok(opt.map(try_into_arc_str).transpose()?)
}
#[tracing::instrument(level = "debug", skip(self))]
async fn variants(&self, hash: Hash) -> Result<Vec<(String, Arc<str>)>, RepoError> {
let hash = hash.to_ivec();
let vec = b!(
self.hash_variant_identifiers,
Ok(hash_variant_identifiers
.scan_prefix(hash.clone())
.filter_map(|res| res.ok())
.filter_map(|(key, ivec)| {
let identifier = try_into_arc_str(ivec).ok();
let variant = variant_from_key(&hash, &key);
if variant.is_none() {
tracing::warn!("Skipping a variant: {}", String::from_utf8_lossy(&key));
}
Some((variant?, identifier?))
})
.collect::<Vec<_>>()) as Result<Vec<_>, SledError>
);
Ok(vec)
}
#[tracing::instrument(level = "trace", skip(self))]
async fn remove_variant(&self, hash: Hash, variant: String) -> Result<(), RepoError> {
let hash = hash.to_bytes();
let key = variant_key(&hash, &variant);
b!(
self.hash_variant_identifiers,
hash_variant_identifiers.remove(key)
);
Ok(())
}
}
fn hash_alias_key(hash: &IVec, alias: &IVec) -> Vec<u8> {
let mut v = hash.to_vec();
v.extend_from_slice(alias);

View file

@ -3,7 +3,7 @@ use std::{
str::FromStr,
};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct Serde<T> {
inner: T,
}
@ -44,6 +44,17 @@ impl<T> DerefMut for Serde<T> {
}
}
impl<T> Default for Serde<T>
where
T: Default,
{
fn default() -> Self {
Serde {
inner: T::default(),
}
}
}
impl<T> FromStr for Serde<T>
where
T: FromStr,

View file

@ -1,7 +1,6 @@
use actix_web::web::Bytes;
use futures_core::Stream;
use std::{fmt::Debug, sync::Arc};
use tokio::io::{AsyncRead, AsyncWrite};
use crate::{bytes_stream::BytesStream, error_code::ErrorCode, stream::LocalBoxStream};
@ -73,7 +72,7 @@ impl From<crate::store::object_store::ObjectError> for StoreError {
fn from(value: crate::store::object_store::ObjectError) -> Self {
match value {
e @ crate::store::object_store::ObjectError::Status(
actix_web::http::StatusCode::NOT_FOUND,
reqwest::StatusCode::NOT_FOUND,
_,
_,
) => Self::ObjectNotFound(e),
@ -85,15 +84,6 @@ impl From<crate::store::object_store::ObjectError> for StoreError {
pub(crate) trait Store: Clone + Debug {
async fn health_check(&self) -> Result<(), StoreError>;
async fn save_async_read<Reader>(
&self,
reader: Reader,
content_type: mime::Mime,
extension: Option<&str>,
) -> Result<Arc<str>, StoreError>
where
Reader: AsyncRead;
async fn save_stream<S>(
&self,
stream: S,
@ -125,14 +115,6 @@ pub(crate) trait Store: Clone + Debug {
.map_err(StoreError::ReadStream)
}
async fn read_into<Writer>(
&self,
identifier: &Arc<str>,
writer: &mut Writer,
) -> Result<(), std::io::Error>
where
Writer: AsyncWrite + Unpin;
async fn len(&self, identifier: &Arc<str>) -> Result<u64, StoreError>;
async fn remove(&self, identifier: &Arc<str>) -> Result<(), StoreError>;
@ -146,18 +128,6 @@ where
T::health_check(self).await
}
async fn save_async_read<Reader>(
&self,
reader: Reader,
content_type: mime::Mime,
extension: Option<&str>,
) -> Result<Arc<str>, StoreError>
where
Reader: AsyncRead,
{
T::save_async_read(self, reader, content_type, extension).await
}
async fn save_stream<S>(
&self,
stream: S,
@ -183,17 +153,6 @@ where
T::to_stream(self, identifier, from_start, len).await
}
async fn read_into<Writer>(
&self,
identifier: &Arc<str>,
writer: &mut Writer,
) -> Result<(), std::io::Error>
where
Writer: AsyncWrite + Unpin,
{
T::read_into(self, identifier, writer).await
}
async fn len(&self, identifier: &Arc<str>) -> Result<u64, StoreError> {
T::len(self, identifier).await
}
@ -211,18 +170,6 @@ where
T::health_check(self).await
}
async fn save_async_read<Reader>(
&self,
reader: Reader,
content_type: mime::Mime,
extension: Option<&str>,
) -> Result<Arc<str>, StoreError>
where
Reader: AsyncRead,
{
T::save_async_read(self, reader, content_type, extension).await
}
async fn save_stream<S>(
&self,
stream: S,
@ -248,17 +195,6 @@ where
T::to_stream(self, identifier, from_start, len).await
}
async fn read_into<Writer>(
&self,
identifier: &Arc<str>,
writer: &mut Writer,
) -> Result<(), std::io::Error>
where
Writer: AsyncWrite + Unpin,
{
T::read_into(self, identifier, writer).await
}
async fn len(&self, identifier: &Arc<str>) -> Result<u64, StoreError> {
T::len(self, identifier).await
}
@ -276,18 +212,6 @@ where
T::health_check(self).await
}
async fn save_async_read<Reader>(
&self,
reader: Reader,
content_type: mime::Mime,
extension: Option<&str>,
) -> Result<Arc<str>, StoreError>
where
Reader: AsyncRead,
{
T::save_async_read(self, reader, content_type, extension).await
}
async fn save_stream<S>(
&self,
stream: S,
@ -313,17 +237,6 @@ where
T::to_stream(self, identifier, from_start, len).await
}
async fn read_into<Writer>(
&self,
identifier: &Arc<str>,
writer: &mut Writer,
) -> Result<(), std::io::Error>
where
Writer: AsyncWrite + Unpin,
{
T::read_into(self, identifier, writer).await
}
async fn len(&self, identifier: &Arc<str>) -> Result<u64, StoreError> {
T::len(self, identifier).await
}

View file

@ -1,12 +1,12 @@
use crate::{error_code::ErrorCode, file::File, store::Store, stream::LocalBoxStream};
use crate::{
error_code::ErrorCode, file::File, future::WithPollTimer, store::Store, stream::LocalBoxStream,
};
use actix_web::web::Bytes;
use futures_core::Stream;
use std::{
path::{Path, PathBuf},
sync::Arc,
};
use tokio::io::{AsyncRead, AsyncWrite};
use tokio_util::io::StreamReader;
use tracing::Instrument;
use super::StoreError;
@ -51,39 +51,26 @@ impl Store for FileStore {
Ok(())
}
#[tracing::instrument(skip(self, reader))]
async fn save_async_read<Reader>(
&self,
reader: Reader,
_content_type: mime::Mime,
extension: Option<&str>,
) -> Result<Arc<str>, StoreError>
where
Reader: AsyncRead,
{
let mut reader = std::pin::pin!(reader);
let path = self.next_file(extension);
if let Err(e) = self.safe_save_reader(&path, &mut reader).await {
self.safe_remove_file(&path).await?;
return Err(e.into());
}
Ok(self.file_id_from_path(path)?)
}
async fn save_stream<S>(
&self,
stream: S,
content_type: mime::Mime,
_content_type: mime::Mime,
extension: Option<&str>,
) -> Result<Arc<str>, StoreError>
where
S: Stream<Item = std::io::Result<Bytes>>,
{
self.save_async_read(StreamReader::new(stream), content_type, extension)
let path = self.next_file(extension);
if let Err(e) = self
.safe_save_stream(&path, crate::stream::error_injector(stream))
.await
{
self.safe_remove_file(&path).await?;
return Err(e.into());
}
Ok(self.file_id_from_path(path)?)
}
fn public_url(&self, _identifier: &Arc<str>) -> Option<url::Url> {
@ -111,23 +98,7 @@ impl Store for FileStore {
.instrument(file_span)
.await?;
Ok(Box::pin(stream))
}
#[tracing::instrument(skip(self, writer))]
async fn read_into<Writer>(
&self,
identifier: &Arc<str>,
writer: &mut Writer,
) -> Result<(), std::io::Error>
where
Writer: AsyncWrite + Unpin,
{
let path = self.path_from_file_id(identifier);
File::open(&path).await?.read_to_async_write(writer).await?;
Ok(())
Ok(Box::pin(crate::stream::error_injector(stream)))
}
#[tracing::instrument(skip(self))]
@ -198,10 +169,10 @@ impl FileStore {
}
}
async fn safe_save_reader<P: AsRef<Path>>(
async fn safe_save_stream<P: AsRef<Path>>(
&self,
to: P,
input: &mut (impl AsyncRead + Unpin + ?Sized),
input: impl Stream<Item = std::io::Result<Bytes>>,
) -> Result<(), FileError> {
safe_create_parent(&to).await?;
@ -215,7 +186,11 @@ impl FileStore {
let mut file = File::create(to).await?;
file.write_from_async_read(input).await?;
file.write_from_stream(input)
.with_poll_timer("write-from-stream")
.await?;
file.close().await?;
Ok(())
}

View file

@ -4,16 +4,16 @@ use crate::{
};
use actix_web::{
error::BlockingError,
http::{
header::{ByteRangeSpec, Range, CONTENT_LENGTH},
StatusCode,
},
http::header::{ByteRangeSpec, Range},
rt::task::JoinError,
web::Bytes,
};
use base64::{prelude::BASE64_STANDARD, Engine};
use futures_core::Stream;
use reqwest::{header::RANGE, Body, Response};
use reqwest::{
header::{CONTENT_LENGTH, RANGE},
Body, Response, StatusCode,
};
use reqwest_middleware::{ClientWithMiddleware, RequestBuilder};
use rusty_s3::{
actions::{CreateMultipartUpload, S3Action},
@ -21,8 +21,6 @@ use rusty_s3::{
};
use std::{string::FromUtf8Error, sync::Arc, time::Duration};
use streem::IntoStreamer;
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
use tokio_util::io::ReaderStream;
use tracing::Instrument;
use url::Url;
@ -207,23 +205,6 @@ impl Store for ObjectStore {
Ok(())
}
async fn save_async_read<Reader>(
&self,
reader: Reader,
content_type: mime::Mime,
extension: Option<&str>,
) -> Result<Arc<str>, StoreError>
where
Reader: AsyncRead,
{
self.save_stream(
ReaderStream::with_capacity(reader, 1024 * 64),
content_type,
extension,
)
.await
}
#[tracing::instrument(skip_all)]
async fn save_stream<S>(
&self,
@ -235,7 +216,11 @@ impl Store for ObjectStore {
S: Stream<Item = std::io::Result<Bytes>>,
{
match self
.start_upload(stream, content_type.clone(), extension)
.start_upload(
crate::stream::error_injector(stream),
content_type.clone(),
extension,
)
.await?
{
UploadState::Single(first_chunk) => {
@ -244,7 +229,7 @@ impl Store for ObjectStore {
.await?;
let response = req
.body(Body::wrap_stream(first_chunk))
.body(Body::wrap_stream(first_chunk.into_io_stream()))
.send()
.with_metrics(crate::init_metrics::OBJECT_STORAGE_PUT_OBJECT_REQUEST)
.await
@ -325,52 +310,14 @@ impl Store for ObjectStore {
return Err(status_error(response, Some(identifier.clone())).await);
}
Ok(Box::pin(crate::stream::metrics(
crate::init_metrics::OBJECT_STORAGE_GET_OBJECT_REQUEST_STREAM,
crate::stream::map_err(response.bytes_stream(), payload_to_io_error),
Ok(Box::pin(crate::stream::error_injector(
crate::stream::metrics(
crate::init_metrics::OBJECT_STORAGE_GET_OBJECT_REQUEST_STREAM,
crate::stream::map_err(response.bytes_stream(), payload_to_io_error),
),
)))
}
#[tracing::instrument(skip(self, writer))]
async fn read_into<Writer>(
&self,
identifier: &Arc<str>,
writer: &mut Writer,
) -> Result<(), std::io::Error>
where
Writer: AsyncWrite + Unpin,
{
let response = self
.get_object_request(identifier, None, None)
.send()
.with_metrics(crate::init_metrics::OBJECT_STORAGE_GET_OBJECT_REQUEST)
.await
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, ObjectError::from(e)))?;
if !response.status().is_success() {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
status_error(response, Some(identifier.clone())).await,
));
}
let stream = std::pin::pin!(crate::stream::metrics(
crate::init_metrics::OBJECT_STORAGE_GET_OBJECT_REQUEST_STREAM,
response.bytes_stream()
));
let mut stream = stream.into_streamer();
while let Some(res) = stream.next().await {
tracing::trace!("read_into: looping");
let mut bytes = res.map_err(payload_to_io_error)?;
writer.write_all_buf(&mut bytes).await?;
}
writer.flush().await?;
Ok(())
}
#[tracing::instrument(skip(self))]
async fn len(&self, identifier: &Arc<str>) -> Result<u64, StoreError> {
let response = self
@ -524,7 +471,7 @@ impl ObjectStore {
&upload_id2,
)
.await?
.body(Body::wrap_stream(buf))
.body(Body::wrap_stream(buf.into_io_stream()))
.send()
.with_metrics(
crate::init_metrics::OBJECT_STORAGE_CREATE_UPLOAD_PART_REQUEST,

View file

@ -5,6 +5,38 @@ use streem::IntoStreamer;
use crate::future::WithMetrics;
#[cfg(not(feature = "random-errors"))]
pub(crate) fn error_injector(
stream: impl Stream<Item = std::io::Result<Bytes>>,
) -> impl Stream<Item = std::io::Result<Bytes>> {
stream
}
#[cfg(feature = "random-errors")]
pub(crate) fn error_injector(
stream: impl Stream<Item = std::io::Result<Bytes>>,
) -> impl Stream<Item = std::io::Result<Bytes>> {
streem::try_from_fn(|yielder| async move {
let stream = std::pin::pin!(stream);
let mut streamer = stream.into_streamer();
while let Some(item) = streamer.try_next().await? {
yielder.yield_ok(item).await;
use nanorand::Rng;
if nanorand::tls_rng().generate_range(0..1000) < 1 {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
crate::error::UploadError::RandomError,
));
}
}
Ok(())
})
}
pub(crate) fn take<S>(stream: S, amount: usize) -> impl Stream<Item = S::Item>
where
S: Stream,
@ -59,7 +91,7 @@ where
S: Stream + 'static,
S::Item: Send + Sync,
{
let (tx, rx) = crate::sync::channel(1);
let (tx, mut rx) = crate::sync::channel(1);
let handle = crate::sync::abort_on_drop(crate::sync::spawn("send-stream", async move {
let stream = std::pin::pin!(stream);
@ -68,16 +100,14 @@ where
while let Some(res) = streamer.next().await {
tracing::trace!("make send tx: looping");
if tx.send_async(res).await.is_err() {
if tx.send(res).await.is_err() {
break;
}
}
}));
streem::from_fn(|yiedler| async move {
let mut stream = rx.into_stream().into_streamer();
while let Some(res) = stream.next().await {
while let Some(res) = rx.recv().await {
tracing::trace!("make send rx: looping");
yiedler.yield_(res).await;
@ -92,35 +122,23 @@ where
I: IntoIterator + Send + 'static,
I::Item: Send + Sync,
{
let (tx, rx) = crate::sync::channel(buffer);
let (tx, mut rx) = crate::sync::channel(buffer);
let handle = crate::sync::spawn_blocking("blocking-iterator", move || {
for value in iterator {
if tx.send(value).is_err() {
if tx.blocking_send(value).is_err() {
break;
}
}
});
streem::from_fn(|yielder| async move {
let mut stream = rx.into_stream().into_streamer();
let yield_count = buffer.max(8);
let mut count = 0;
while let Some(res) = stream.next().await {
while let Some(res) = rx.recv().await {
tracing::trace!("from_iterator: looping");
count += 1;
count %= yield_count;
yielder.yield_(res).await;
// every 8 (or buffer-size) items, yield to executor before looping
// improves cooperation
if count == 0 {
tokio::task::yield_now().await;
}
crate::sync::cooperate().await;
}
let _ = handle.await;

View file

@ -5,6 +5,8 @@ use tokio::{
task::JoinHandle,
};
use crate::future::WithPollTimer;
pub(crate) struct DropHandle<T> {
handle: JoinHandle<T>,
}
@ -37,11 +39,13 @@ impl<T> std::future::Future for DropHandle<T> {
}
#[track_caller]
pub(crate) fn channel<T>(bound: usize) -> (flume::Sender<T>, flume::Receiver<T>) {
pub(crate) fn channel<T>(
bound: usize,
) -> (tokio::sync::mpsc::Sender<T>, tokio::sync::mpsc::Receiver<T>) {
let span = tracing::trace_span!(parent: None, "make channel");
let guard = span.enter();
let channel = flume::bounded(bound);
let channel = tokio::sync::mpsc::channel(bound);
drop(guard);
channel
@ -74,14 +78,22 @@ pub(crate) fn bare_semaphore(permits: usize) -> Semaphore {
semaphore
}
// best effort cooperation mechanism
pub(crate) async fn cooperate() {
#[cfg(tokio_unstable)]
tokio::task::consume_budget().await;
#[cfg(not(tokio_unstable))]
tokio::task::yield_now().await;
}
#[track_caller]
pub(crate) fn spawn<F>(name: &str, future: F) -> tokio::task::JoinHandle<F::Output>
pub(crate) fn spawn<F>(name: &'static str, future: F) -> tokio::task::JoinHandle<F::Output>
where
F: std::future::Future + 'static,
F::Output: 'static,
{
#[cfg(not(tokio_unstable))]
let _ = name;
let future = future.with_poll_timer(name);
let span = tracing::trace_span!(parent: None, "spawn task");
let guard = span.enter();
@ -98,6 +110,29 @@ where
handle
}
#[track_caller]
pub(crate) fn spawn_sendable<F>(name: &'static str, future: F) -> tokio::task::JoinHandle<F::Output>
where
F: std::future::Future + Send + 'static,
F::Output: Send + 'static,
{
let future = future.with_poll_timer(name);
let span = tracing::trace_span!(parent: None, "spawn task");
let guard = span.enter();
#[cfg(tokio_unstable)]
let handle = tokio::task::Builder::new()
.name(name)
.spawn(future)
.expect("Failed to spawn");
#[cfg(not(tokio_unstable))]
let handle = tokio::task::spawn(future);
drop(guard);
handle
}
#[track_caller]
pub(crate) fn spawn_blocking<F, Out>(name: &str, function: F) -> tokio::task::JoinHandle<Out>
where

View file

@ -1,6 +1,6 @@
use std::path::PathBuf;
use rustls::{crypto::ring::sign::any_supported_type, sign::CertifiedKey, Error};
use rustls::{crypto::aws_lc_rs::sign::any_supported_type, sign::CertifiedKey, Error};
pub(super) struct Tls {
certificate: PathBuf,

View file

@ -16,9 +16,17 @@ pub(crate) struct TmpDir {
}
impl TmpDir {
pub(crate) async fn init<P: AsRef<Path>>(path: P) -> std::io::Result<Arc<Self>> {
let path = path.as_ref().join(Uuid::now_v7().to_string());
tokio::fs::create_dir(&path).await?;
pub(crate) async fn init<P: AsRef<Path>>(path: P, cleanup: bool) -> std::io::Result<Arc<Self>> {
let base_path = path.as_ref().join("pict-rs");
if cleanup && tokio::fs::metadata(&base_path).await.is_ok() {
tokio::fs::remove_dir_all(&base_path).await?;
}
let path = base_path.join(Uuid::now_v7().to_string());
tokio::fs::create_dir_all(&path).await?;
Ok(Arc::new(TmpDir { path: Some(path) }))
}
@ -47,8 +55,13 @@ impl TmpDir {
}
pub(crate) async fn cleanup(self: Arc<Self>) -> std::io::Result<()> {
if let Some(path) = Arc::into_inner(self).and_then(|mut this| this.path.take()) {
tokio::fs::remove_dir_all(path).await?;
if let Some(mut path) = Arc::into_inner(self).and_then(|mut this| this.path.take()) {
tokio::fs::remove_dir_all(&path).await?;
if path.pop() {
// attempt to remove parent directory if it is empty
let _ = tokio::fs::remove_dir(path).await;
}
}
Ok(())
@ -57,9 +70,13 @@ impl TmpDir {
impl Drop for TmpDir {
fn drop(&mut self) {
if let Some(path) = self.path.take() {
if let Some(mut path) = self.path.take() {
tracing::warn!("TmpDir - Blocking remove of {path:?}");
std::fs::remove_dir_all(path).expect("Removed directory");
std::fs::remove_dir_all(&path).expect("Removed directory");
if path.pop() {
// attempt to remove parent directory if it is empty
let _ = std::fs::remove_dir(path);
}
}
}
}

View file

@ -1,11 +0,0 @@
use crate::{exiftool::ExifError, process::Process};
#[tracing::instrument(level = "trace", skip_all)]
pub(super) fn clear_metadata_command(timeout: u64) -> Result<Process, ExifError> {
Ok(Process::run(
"exiftool",
&["-all=", "-", "-out", "-"],
&[],
timeout,
)?)
}