mirror of
https://git.asonix.dog/asonix/pict-rs.git
synced 2024-11-28 04:21:12 +00:00
Implement retries for jobs, start warning on long polls
This commit is contained in:
parent
40bb58d603
commit
9fe586b9dd
16 changed files with 558 additions and 229 deletions
|
@ -108,6 +108,9 @@ pub(crate) enum UploadError {
|
|||
#[error("Error in request response")]
|
||||
Request(#[from] reqwest::Error),
|
||||
|
||||
#[error("Invalid job popped from job queue: {1}")]
|
||||
InvalidJob(#[source] serde_json::Error, String),
|
||||
|
||||
#[error("pict-rs is in read-only mode")]
|
||||
ReadOnly,
|
||||
|
||||
|
@ -201,6 +204,7 @@ impl UploadError {
|
|||
Self::Timeout(_) | Self::AggregateTimeout => ErrorCode::STREAM_TOO_SLOW,
|
||||
Self::ProcessTimeout => ErrorCode::COMMAND_TIMEOUT,
|
||||
Self::FailedExternalValidation => ErrorCode::FAILED_EXTERNAL_VALIDATION,
|
||||
Self::InvalidJob(_, _) => ErrorCode::INVALID_JOB,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -144,4 +144,7 @@ impl ErrorCode {
|
|||
pub(crate) const FAILED_EXTERNAL_VALIDATION: ErrorCode = ErrorCode {
|
||||
code: "failed-external-validation",
|
||||
};
|
||||
pub(crate) const INVALID_JOB: ErrorCode = ErrorCode {
|
||||
code: "invalid-job",
|
||||
};
|
||||
}
|
||||
|
|
|
@ -59,9 +59,19 @@ pub(crate) trait WithMetrics: Future {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) trait WithPollTimer: Future {
|
||||
fn with_poll_timer(self, name: &'static str) -> PollTimer<Self>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
PollTimer { name, inner: self }
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> NowOrNever for F where F: Future {}
|
||||
impl<F> WithMetrics for F where F: Future {}
|
||||
impl<F> WithTimeout for F where F: Future {}
|
||||
impl<F> WithPollTimer for F where F: Future {}
|
||||
|
||||
pin_project_lite::pin_project! {
|
||||
pub(crate) struct MetricsFuture<F> {
|
||||
|
@ -104,3 +114,59 @@ impl Drop for Metrics {
|
|||
.record(self.start.elapsed().as_secs_f64());
|
||||
}
|
||||
}
|
||||
|
||||
pin_project_lite::pin_project! {
|
||||
pub(crate) struct PollTimer<F> {
|
||||
name: &'static str,
|
||||
|
||||
#[pin]
|
||||
inner: F,
|
||||
}
|
||||
}
|
||||
|
||||
impl<F> Future for PollTimer<F>
|
||||
where
|
||||
F: Future,
|
||||
{
|
||||
type Output = F::Output;
|
||||
|
||||
fn poll(
|
||||
self: std::pin::Pin<&mut Self>,
|
||||
cx: &mut std::task::Context<'_>,
|
||||
) -> std::task::Poll<Self::Output> {
|
||||
let start = Instant::now();
|
||||
|
||||
let this = self.project();
|
||||
|
||||
let out = this.inner.poll(cx);
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
if elapsed > Duration::from_micros(10) {
|
||||
metrics::counter!(crate::init_metrics::FUTURE_POLL_TIMER_EXCEEDED, "timer" => this.name.to_string());
|
||||
}
|
||||
|
||||
if elapsed > Duration::from_secs(1) {
|
||||
tracing::warn!(
|
||||
"Future {} polled for {} seconds",
|
||||
this.name,
|
||||
elapsed.as_secs()
|
||||
);
|
||||
} else if elapsed > Duration::from_millis(1) {
|
||||
tracing::warn!("Future {} polled for {} ms", this.name, elapsed.as_millis());
|
||||
} else if elapsed > Duration::from_micros(200) {
|
||||
tracing::debug!(
|
||||
"Future {} polled for {} microseconds",
|
||||
this.name,
|
||||
elapsed.as_micros(),
|
||||
);
|
||||
} else if elapsed > Duration::from_micros(1) {
|
||||
tracing::trace!(
|
||||
"Future {} polled for {} microseconds",
|
||||
this.name,
|
||||
elapsed.as_micros()
|
||||
);
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ use crate::{
|
|||
details::Details,
|
||||
error::{Error, UploadError},
|
||||
formats::{ImageFormat, InputProcessableFormat, InternalVideoFormat, ProcessableFormat},
|
||||
future::{WithMetrics, WithTimeout},
|
||||
future::{WithMetrics, WithPollTimer, WithTimeout},
|
||||
repo::{Hash, VariantAlreadyExists},
|
||||
state::State,
|
||||
store::Store,
|
||||
|
@ -48,7 +48,7 @@ impl Drop for MetricsGuard {
|
|||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(state, process_map, hash))]
|
||||
#[tracing::instrument(skip(state, process_map, original_details, hash))]
|
||||
pub(crate) async fn generate<S: Store + 'static>(
|
||||
state: &State<S>,
|
||||
process_map: &ProcessMap,
|
||||
|
@ -78,6 +78,7 @@ pub(crate) async fn generate<S: Store + 'static>(
|
|||
|
||||
let (details, identifier) = process_map
|
||||
.process(hash, thumbnail_path, process_fut)
|
||||
.with_poll_timer("process-future")
|
||||
.with_timeout(Duration::from_secs(state.config.media.process_timeout * 4))
|
||||
.with_metrics(crate::init_metrics::GENERATE_PROCESS)
|
||||
.await
|
||||
|
|
|
@ -5,7 +5,7 @@ use crate::{
|
|||
details::Details,
|
||||
error::{Error, UploadError},
|
||||
formats::InternalFormat,
|
||||
future::WithMetrics,
|
||||
future::{WithMetrics, WithPollTimer},
|
||||
repo::{Alias, ArcRepo, DeleteToken, Hash},
|
||||
state::State,
|
||||
store::Store,
|
||||
|
@ -159,7 +159,9 @@ where
|
|||
let (input_type, identifier, details, hash_state) = if state.config.server.danger_dummy_mode {
|
||||
dummy_ingest(state, stream).await?
|
||||
} else {
|
||||
process_ingest(state, stream).await?
|
||||
process_ingest(state, stream)
|
||||
.with_poll_timer("ingest-future")
|
||||
.await?
|
||||
};
|
||||
|
||||
let mut session = Session {
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
pub(super) fn init_metrics() {
|
||||
describe_toplevel();
|
||||
describe_future();
|
||||
describe_queue_cleanup();
|
||||
describe_payload();
|
||||
describe_job();
|
||||
|
@ -26,6 +27,15 @@ fn describe_toplevel() {
|
|||
pub(crate) const FILES: &str = "pict-rs.files";
|
||||
pub(crate) const BACKGROUND_UPLOAD_CLAIM: &str = "pict-rs.background.upload.claim";
|
||||
|
||||
fn describe_future() {
|
||||
metrics::describe_counter!(
|
||||
FUTURE_POLL_TIMER_EXCEEDED,
|
||||
"How many times a given poll operation has lasted longer than 10 microseconds"
|
||||
);
|
||||
}
|
||||
|
||||
pub(crate) const FUTURE_POLL_TIMER_EXCEEDED: &str = "pict-rs.future.poll-timer.exceeded";
|
||||
|
||||
fn describe_queue_cleanup() {
|
||||
metrics::describe_counter!(
|
||||
CLEANUP_OUTDATED_PROXY,
|
||||
|
@ -344,6 +354,14 @@ fn describe_postgres() {
|
|||
POSTGRES_QUEUE_HEARTBEAT,
|
||||
"Timings for updating the provided job's keepalive heartbeat"
|
||||
);
|
||||
metrics::describe_histogram!(
|
||||
POSTGRES_QUEUE_RETRY,
|
||||
"Timings for updating retry count for a job"
|
||||
);
|
||||
metrics::describe_histogram!(
|
||||
POSTGRES_QUEUE_CLEANUP,
|
||||
"Timings for removing jobs with no more retries"
|
||||
);
|
||||
metrics::describe_histogram!(
|
||||
POSTGRES_QUEUE_COMPLETE,
|
||||
"Timings for removing a completed job from the queue"
|
||||
|
@ -471,6 +489,8 @@ pub(crate) const POSTGRES_QUEUE_LISTEN: &str = "pict-rs.postgres.queue.listen";
|
|||
pub(crate) const POSTGRES_QUEUE_REQUEUE: &str = "pict-rs.postgres.queue.requeue";
|
||||
pub(crate) const POSTGRES_QUEUE_CLAIM: &str = "pict-rs.postgres.queue.claim";
|
||||
pub(crate) const POSTGRES_QUEUE_HEARTBEAT: &str = "pict-rs.postgres.queue.heartbeat";
|
||||
pub(crate) const POSTGRES_QUEUE_RETRY: &str = "pict-rs.postgres.queue.retry";
|
||||
pub(crate) const POSTGRES_QUEUE_CLEANUP: &str = "pict-rs.postgres.queue.cleanup";
|
||||
pub(crate) const POSTGRES_QUEUE_COMPLETE: &str = "pict-rs.postgres.queue.complete";
|
||||
pub(crate) const POSTGRES_STORE_MIGRATION_COUNT: &str = "pict-rs.postgres.store-migration.count";
|
||||
pub(crate) const POSTGRES_STORE_MIGRATION_MARK_MIGRATED: &str =
|
||||
|
|
|
@ -44,7 +44,7 @@ use actix_web::{
|
|||
web, App, HttpRequest, HttpResponse, HttpResponseBuilder, HttpServer,
|
||||
};
|
||||
use details::{ApiDetails, HumanDate};
|
||||
use future::WithTimeout;
|
||||
use future::{WithPollTimer, WithTimeout};
|
||||
use futures_core::Stream;
|
||||
use magick::ArcPolicyDir;
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
|
@ -186,6 +186,7 @@ impl<S: Store + 'static> FormData for Upload<S> {
|
|||
|
||||
ingest::ingest(&state, stream, None).await
|
||||
}
|
||||
.with_poll_timer("file-upload")
|
||||
.instrument(span),
|
||||
)
|
||||
})),
|
||||
|
@ -237,6 +238,7 @@ impl<S: Store + 'static> FormData for Import<S> {
|
|||
ingest::ingest(&state, stream, Some(Alias::from_existing(&filename)))
|
||||
.await
|
||||
}
|
||||
.with_poll_timer("file-import")
|
||||
.instrument(span),
|
||||
)
|
||||
})),
|
||||
|
@ -351,6 +353,7 @@ impl<S: Store + 'static> FormData for BackgroundedUpload<S> {
|
|||
|
||||
Backgrounded::proxy(&state, stream).await
|
||||
}
|
||||
.with_poll_timer("file-proxy")
|
||||
.instrument(span),
|
||||
)
|
||||
})),
|
||||
|
|
97
src/queue.rs
97
src/queue.rs
|
@ -10,6 +10,7 @@ use crate::{
|
|||
};
|
||||
|
||||
use std::{
|
||||
ops::Deref,
|
||||
path::PathBuf,
|
||||
sync::Arc,
|
||||
time::{Duration, Instant},
|
||||
|
@ -225,10 +226,86 @@ impl Drop for MetricsGuard {
|
|||
}
|
||||
}
|
||||
|
||||
pub(super) enum JobError {
|
||||
Abort(Error),
|
||||
Retry(Error),
|
||||
}
|
||||
|
||||
impl AsRef<Error> for JobError {
|
||||
fn as_ref(&self) -> &Error {
|
||||
match self {
|
||||
Self::Abort(e) | Self::Retry(e) => e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for JobError {
|
||||
type Target = Error;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self {
|
||||
Self::Abort(e) | Self::Retry(e) => e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JobError> for Error {
|
||||
fn from(value: JobError) -> Self {
|
||||
match value {
|
||||
JobError::Abort(e) | JobError::Retry(e) => e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type JobResult<T = ()> = Result<T, JobError>;
|
||||
|
||||
type JobFuture<'a> = LocalBoxFuture<'a, JobResult>;
|
||||
|
||||
trait JobContext {
|
||||
type Item;
|
||||
|
||||
fn abort(self) -> JobResult<Self::Item>
|
||||
where
|
||||
Self: Sized;
|
||||
|
||||
fn retry(self) -> JobResult<Self::Item>
|
||||
where
|
||||
Self: Sized;
|
||||
}
|
||||
|
||||
impl<T, E> JobContext for Result<T, E>
|
||||
where
|
||||
E: Into<Error>,
|
||||
{
|
||||
type Item = T;
|
||||
|
||||
fn abort(self) -> JobResult<Self::Item>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.map_err(Into::into).map_err(JobError::Abort)
|
||||
}
|
||||
|
||||
fn retry(self) -> JobResult<Self::Item>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
self.map_err(Into::into).map_err(JobError::Retry)
|
||||
}
|
||||
}
|
||||
|
||||
fn job_result(result: &JobResult) -> crate::repo::JobResult {
|
||||
match result {
|
||||
Ok(()) => crate::repo::JobResult::Success,
|
||||
Err(JobError::Retry(_)) => crate::repo::JobResult::Failure,
|
||||
Err(JobError::Abort(_)) => crate::repo::JobResult::Aborted,
|
||||
}
|
||||
}
|
||||
|
||||
async fn process_jobs<S, F>(state: State<S>, queue: &'static str, callback: F)
|
||||
where
|
||||
S: Store,
|
||||
for<'a> F: Fn(&'a State<S>, serde_json::Value) -> LocalBoxFuture<'a, Result<(), Error>> + Copy,
|
||||
for<'a> F: Fn(&'a State<S>, serde_json::Value) -> JobFuture<'a> + Copy,
|
||||
{
|
||||
let worker_id = uuid::Uuid::new_v4();
|
||||
|
||||
|
@ -262,7 +339,7 @@ async fn job_loop<S, F>(
|
|||
) -> Result<(), Error>
|
||||
where
|
||||
S: Store,
|
||||
for<'a> F: Fn(&'a State<S>, serde_json::Value) -> LocalBoxFuture<'a, Result<(), Error>> + Copy,
|
||||
for<'a> F: Fn(&'a State<S>, serde_json::Value) -> JobFuture<'a> + Copy,
|
||||
{
|
||||
loop {
|
||||
tracing::trace!("job_loop: looping");
|
||||
|
@ -283,7 +360,10 @@ where
|
|||
)
|
||||
.await;
|
||||
|
||||
state.repo.complete_job(queue, worker_id, job_id).await?;
|
||||
state
|
||||
.repo
|
||||
.complete_job(queue, worker_id, job_id, job_result(&res))
|
||||
.await?;
|
||||
|
||||
res?;
|
||||
|
||||
|
@ -303,8 +383,7 @@ async fn process_image_jobs<S, F>(
|
|||
callback: F,
|
||||
) where
|
||||
S: Store,
|
||||
for<'a> F: Fn(&'a State<S>, &'a ProcessMap, serde_json::Value) -> LocalBoxFuture<'a, Result<(), Error>>
|
||||
+ Copy,
|
||||
for<'a> F: Fn(&'a State<S>, &'a ProcessMap, serde_json::Value) -> JobFuture<'a> + Copy,
|
||||
{
|
||||
let worker_id = uuid::Uuid::new_v4();
|
||||
|
||||
|
@ -339,8 +418,7 @@ async fn image_job_loop<S, F>(
|
|||
) -> Result<(), Error>
|
||||
where
|
||||
S: Store,
|
||||
for<'a> F: Fn(&'a State<S>, &'a ProcessMap, serde_json::Value) -> LocalBoxFuture<'a, Result<(), Error>>
|
||||
+ Copy,
|
||||
for<'a> F: Fn(&'a State<S>, &'a ProcessMap, serde_json::Value) -> JobFuture<'a> + Copy,
|
||||
{
|
||||
loop {
|
||||
tracing::trace!("image_job_loop: looping");
|
||||
|
@ -361,7 +439,10 @@ where
|
|||
)
|
||||
.await;
|
||||
|
||||
state.repo.complete_job(queue, worker_id, job_id).await?;
|
||||
state
|
||||
.repo
|
||||
.complete_job(queue, worker_id, job_id, job_result(&res))
|
||||
.await?;
|
||||
|
||||
res?;
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ use tracing::{Instrument, Span};
|
|||
use crate::{
|
||||
config::Configuration,
|
||||
error::{Error, UploadError},
|
||||
future::LocalBoxFuture,
|
||||
future::WithPollTimer,
|
||||
queue::Cleanup,
|
||||
repo::{Alias, ArcRepo, DeleteToken, Hash},
|
||||
serde_str::Serde,
|
||||
|
@ -14,41 +14,67 @@ use crate::{
|
|||
store::Store,
|
||||
};
|
||||
|
||||
pub(super) fn perform<S>(
|
||||
state: &State<S>,
|
||||
job: serde_json::Value,
|
||||
) -> LocalBoxFuture<'_, Result<(), Error>>
|
||||
use super::{JobContext, JobFuture, JobResult};
|
||||
|
||||
pub(super) fn perform<S>(state: &State<S>, job: serde_json::Value) -> JobFuture<'_>
|
||||
where
|
||||
S: Store + 'static,
|
||||
{
|
||||
Box::pin(async move {
|
||||
match serde_json::from_value(job) {
|
||||
Ok(job) => match job {
|
||||
Cleanup::Hash { hash: in_hash } => hash(&state.repo, in_hash).await?,
|
||||
Cleanup::Identifier {
|
||||
identifier: in_identifier,
|
||||
} => identifier(&state.repo, &state.store, Arc::from(in_identifier)).await?,
|
||||
Cleanup::Alias {
|
||||
alias: stored_alias,
|
||||
token,
|
||||
} => {
|
||||
alias(
|
||||
&state.repo,
|
||||
Serde::into_inner(stored_alias),
|
||||
Serde::into_inner(token),
|
||||
)
|
||||
let job_text = format!("{job}");
|
||||
|
||||
let job = serde_json::from_value(job)
|
||||
.map_err(|e| UploadError::InvalidJob(e, job_text))
|
||||
.abort()?;
|
||||
|
||||
match job {
|
||||
Cleanup::Hash { hash: in_hash } => {
|
||||
hash(&state.repo, in_hash)
|
||||
.with_poll_timer("cleanup-hash")
|
||||
.await?
|
||||
}
|
||||
Cleanup::Identifier {
|
||||
identifier: in_identifier,
|
||||
} => {
|
||||
identifier(&state.repo, &state.store, Arc::from(in_identifier))
|
||||
.with_poll_timer("cleanup-identifier")
|
||||
.await?
|
||||
}
|
||||
Cleanup::Alias {
|
||||
alias: stored_alias,
|
||||
token,
|
||||
} => {
|
||||
alias(
|
||||
&state.repo,
|
||||
Serde::into_inner(stored_alias),
|
||||
Serde::into_inner(token),
|
||||
)
|
||||
.await?
|
||||
}
|
||||
Cleanup::Variant { hash, variant } => {
|
||||
hash_variant(&state.repo, hash, variant)
|
||||
.with_poll_timer("cleanup-hash-variant")
|
||||
.await?
|
||||
}
|
||||
Cleanup::AllVariants => {
|
||||
all_variants(&state.repo)
|
||||
.with_poll_timer("cleanup-all-variants")
|
||||
.await?
|
||||
}
|
||||
Cleanup::OutdatedVariants => {
|
||||
outdated_variants(&state.repo, &state.config)
|
||||
.with_poll_timer("cleanup-outdated-variants")
|
||||
.await?
|
||||
}
|
||||
Cleanup::OutdatedProxies => {
|
||||
outdated_proxies(&state.repo, &state.config)
|
||||
.with_poll_timer("cleanup-outdated-proxies")
|
||||
.await?
|
||||
}
|
||||
Cleanup::Prune => {
|
||||
prune(&state.repo, &state.store)
|
||||
.with_poll_timer("cleanup-prune")
|
||||
.await?
|
||||
}
|
||||
Cleanup::Variant { hash, variant } => {
|
||||
hash_variant(&state.repo, hash, variant).await?
|
||||
}
|
||||
Cleanup::AllVariants => all_variants(&state.repo).await?,
|
||||
Cleanup::OutdatedVariants => outdated_variants(&state.repo, &state.config).await?,
|
||||
Cleanup::OutdatedProxies => outdated_proxies(&state.repo, &state.config).await?,
|
||||
Cleanup::Prune => prune(&state.repo, &state.store).await?,
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!("Invalid job: {}", format!("{e}"));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -57,36 +83,30 @@ where
|
|||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn identifier<S>(repo: &ArcRepo, store: &S, identifier: Arc<str>) -> Result<(), Error>
|
||||
async fn identifier<S>(repo: &ArcRepo, store: &S, identifier: Arc<str>) -> JobResult
|
||||
where
|
||||
S: Store,
|
||||
{
|
||||
let mut errors = Vec::new();
|
||||
|
||||
if let Err(e) = store.remove(&identifier).await {
|
||||
errors.push(UploadError::from(e));
|
||||
match store.remove(&identifier).await {
|
||||
Ok(_) => {}
|
||||
Err(e) if e.is_not_found() => {}
|
||||
Err(e) => return Err(e).retry(),
|
||||
}
|
||||
|
||||
if let Err(e) = repo.cleanup_details(&identifier).await {
|
||||
errors.push(UploadError::from(e));
|
||||
}
|
||||
|
||||
for error in errors {
|
||||
tracing::error!("{}", format!("{error:?}"));
|
||||
}
|
||||
repo.cleanup_details(&identifier).await.retry()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn hash(repo: &ArcRepo, hash: Hash) -> Result<(), Error> {
|
||||
let aliases = repo.aliases_for_hash(hash.clone()).await?;
|
||||
async fn hash(repo: &ArcRepo, hash: Hash) -> JobResult {
|
||||
let aliases = repo.aliases_for_hash(hash.clone()).await.retry()?;
|
||||
|
||||
if !aliases.is_empty() {
|
||||
for alias in aliases {
|
||||
// TODO: decide if it is okay to skip aliases without tokens
|
||||
if let Some(token) = repo.delete_token(&alias).await? {
|
||||
super::cleanup_alias(repo, alias, token).await?;
|
||||
if let Some(token) = repo.delete_token(&alias).await.retry()? {
|
||||
super::cleanup_alias(repo, alias, token).await.retry()?;
|
||||
} else {
|
||||
tracing::warn!("Not cleaning alias!");
|
||||
}
|
||||
|
@ -97,145 +117,152 @@ async fn hash(repo: &ArcRepo, hash: Hash) -> Result<(), Error> {
|
|||
|
||||
let mut idents = repo
|
||||
.variants(hash.clone())
|
||||
.await?
|
||||
.await
|
||||
.retry()?
|
||||
.into_iter()
|
||||
.map(|(_, v)| v)
|
||||
.collect::<Vec<_>>();
|
||||
idents.extend(repo.identifier(hash.clone()).await?);
|
||||
idents.extend(repo.motion_identifier(hash.clone()).await?);
|
||||
idents.extend(repo.identifier(hash.clone()).await.retry()?);
|
||||
idents.extend(repo.motion_identifier(hash.clone()).await.retry()?);
|
||||
|
||||
for identifier in idents {
|
||||
let _ = super::cleanup_identifier(repo, &identifier).await;
|
||||
super::cleanup_identifier(repo, &identifier).await.retry()?;
|
||||
}
|
||||
|
||||
repo.cleanup_hash(hash).await?;
|
||||
repo.cleanup_hash(hash).await.retry()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn alias(repo: &ArcRepo, alias: Alias, token: DeleteToken) -> Result<(), Error> {
|
||||
let saved_delete_token = repo.delete_token(&alias).await?;
|
||||
pub(crate) async fn alias(repo: &ArcRepo, alias: Alias, token: DeleteToken) -> JobResult {
|
||||
let saved_delete_token = repo.delete_token(&alias).await.retry()?;
|
||||
|
||||
if !saved_delete_token.is_some_and(|t| t.ct_eq(&token)) {
|
||||
return Err(UploadError::InvalidToken.into());
|
||||
return Err(UploadError::InvalidToken).abort();
|
||||
}
|
||||
|
||||
let hash = repo.hash(&alias).await?;
|
||||
let hash = repo.hash(&alias).await.retry()?;
|
||||
|
||||
repo.cleanup_alias(&alias).await?;
|
||||
repo.remove_relation(alias.clone()).await?;
|
||||
repo.remove_alias_access(alias.clone()).await?;
|
||||
repo.cleanup_alias(&alias).await.retry()?;
|
||||
repo.remove_relation(alias.clone()).await.retry()?;
|
||||
repo.remove_alias_access(alias.clone()).await.retry()?;
|
||||
|
||||
let Some(hash) = hash else {
|
||||
// hash doesn't exist, nothing to do
|
||||
return Ok(());
|
||||
};
|
||||
let hash = hash.ok_or(UploadError::MissingAlias).abort()?;
|
||||
|
||||
if repo.aliases_for_hash(hash.clone()).await?.is_empty() {
|
||||
super::cleanup_hash(repo, hash).await?;
|
||||
if repo
|
||||
.aliases_for_hash(hash.clone())
|
||||
.await
|
||||
.retry()?
|
||||
.is_empty()
|
||||
{
|
||||
super::cleanup_hash(repo, hash).await.retry()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn all_variants(repo: &ArcRepo) -> Result<(), Error> {
|
||||
async fn all_variants(repo: &ArcRepo) -> JobResult {
|
||||
let hash_stream = std::pin::pin!(repo.hashes());
|
||||
let mut hash_stream = hash_stream.into_streamer();
|
||||
|
||||
while let Some(res) = hash_stream.next().await {
|
||||
tracing::trace!("all_variants: looping");
|
||||
|
||||
let hash = res?;
|
||||
super::cleanup_variants(repo, hash, None).await?;
|
||||
let hash = res.retry()?;
|
||||
super::cleanup_variants(repo, hash, None).await.retry()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn outdated_variants(repo: &ArcRepo, config: &Configuration) -> Result<(), Error> {
|
||||
async fn outdated_variants(repo: &ArcRepo, config: &Configuration) -> JobResult {
|
||||
let now = time::OffsetDateTime::now_utc();
|
||||
let since = now.saturating_sub(config.media.retention.variants.to_duration());
|
||||
|
||||
let variant_stream = repo.older_variants(since).await?;
|
||||
let variant_stream = repo.older_variants(since).await.retry()?;
|
||||
let variant_stream = std::pin::pin!(crate::stream::take(variant_stream, 2048));
|
||||
let mut variant_stream = variant_stream.into_streamer();
|
||||
|
||||
let mut count = 0;
|
||||
|
||||
while let Some(res) = variant_stream.next().await {
|
||||
while let Some((hash, variant)) = variant_stream.try_next().await.retry()? {
|
||||
metrics::counter!(crate::init_metrics::CLEANUP_OUTDATED_VARIANT).increment(1);
|
||||
tracing::trace!("outdated_variants: looping");
|
||||
|
||||
let (hash, variant) = res?;
|
||||
super::cleanup_variants(repo, hash, Some(variant)).await?;
|
||||
super::cleanup_variants(repo, hash, Some(variant))
|
||||
.await
|
||||
.retry()?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
tracing::debug!("Queued {count} variant cleanup jobs");
|
||||
let queue_length = repo.queue_length().await?;
|
||||
let queue_length = repo.queue_length().await.abort()?;
|
||||
tracing::debug!("Total queue length: {queue_length}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn outdated_proxies(repo: &ArcRepo, config: &Configuration) -> Result<(), Error> {
|
||||
async fn outdated_proxies(repo: &ArcRepo, config: &Configuration) -> JobResult {
|
||||
let now = time::OffsetDateTime::now_utc();
|
||||
let since = now.saturating_sub(config.media.retention.proxy.to_duration());
|
||||
|
||||
let alias_stream = repo.older_aliases(since).await?;
|
||||
let alias_stream = repo.older_aliases(since).await.retry()?;
|
||||
let alias_stream = std::pin::pin!(crate::stream::take(alias_stream, 2048));
|
||||
let mut alias_stream = alias_stream.into_streamer();
|
||||
|
||||
let mut count = 0;
|
||||
|
||||
while let Some(res) = alias_stream.next().await {
|
||||
while let Some(alias) = alias_stream.try_next().await.retry()? {
|
||||
metrics::counter!(crate::init_metrics::CLEANUP_OUTDATED_PROXY).increment(1);
|
||||
tracing::trace!("outdated_proxies: looping");
|
||||
|
||||
let alias = res?;
|
||||
if let Some(token) = repo.delete_token(&alias).await? {
|
||||
super::cleanup_alias(repo, alias, token).await?;
|
||||
if let Some(token) = repo.delete_token(&alias).await.retry()? {
|
||||
super::cleanup_alias(repo, alias, token).await.retry()?;
|
||||
count += 1;
|
||||
} else {
|
||||
tracing::warn!("Skipping alias cleanup - no delete token");
|
||||
repo.remove_relation(alias.clone()).await?;
|
||||
repo.remove_alias_access(alias).await?;
|
||||
repo.remove_relation(alias.clone()).await.retry()?;
|
||||
repo.remove_alias_access(alias).await.retry()?;
|
||||
}
|
||||
}
|
||||
|
||||
tracing::debug!("Queued {count} alias cleanup jobs");
|
||||
let queue_length = repo.queue_length().await?;
|
||||
let queue_length = repo.queue_length().await.abort()?;
|
||||
tracing::debug!("Total queue length: {queue_length}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn hash_variant(
|
||||
repo: &ArcRepo,
|
||||
hash: Hash,
|
||||
target_variant: Option<String>,
|
||||
) -> Result<(), Error> {
|
||||
async fn hash_variant(repo: &ArcRepo, hash: Hash, target_variant: Option<String>) -> JobResult {
|
||||
if let Some(target_variant) = target_variant {
|
||||
if let Some(identifier) = repo
|
||||
.variant_identifier(hash.clone(), target_variant.clone())
|
||||
.await?
|
||||
.await
|
||||
.retry()?
|
||||
{
|
||||
super::cleanup_identifier(repo, &identifier).await?;
|
||||
super::cleanup_identifier(repo, &identifier).await.retry()?;
|
||||
}
|
||||
|
||||
repo.remove_variant(hash.clone(), target_variant.clone())
|
||||
.await?;
|
||||
repo.remove_variant_access(hash, target_variant).await?;
|
||||
.await
|
||||
.retry()?;
|
||||
repo.remove_variant_access(hash, target_variant)
|
||||
.await
|
||||
.retry()?;
|
||||
} else {
|
||||
for (variant, identifier) in repo.variants(hash.clone()).await? {
|
||||
repo.remove_variant(hash.clone(), variant.clone()).await?;
|
||||
repo.remove_variant_access(hash.clone(), variant).await?;
|
||||
super::cleanup_identifier(repo, &identifier).await?;
|
||||
for (variant, identifier) in repo.variants(hash.clone()).await.retry()? {
|
||||
repo.remove_variant(hash.clone(), variant.clone())
|
||||
.await
|
||||
.retry()?;
|
||||
repo.remove_variant_access(hash.clone(), variant)
|
||||
.await
|
||||
.retry()?;
|
||||
super::cleanup_identifier(repo, &identifier).await.retry()?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -243,19 +270,20 @@ async fn hash_variant(
|
|||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn prune<S>(repo: &ArcRepo, store: &S) -> Result<(), Error>
|
||||
async fn prune<S>(repo: &ArcRepo, store: &S) -> JobResult
|
||||
where
|
||||
S: Store + 'static,
|
||||
{
|
||||
repo.set("prune-missing-started", b"1".to_vec().into())
|
||||
.await?;
|
||||
.await
|
||||
.retry()?;
|
||||
|
||||
let hash_stream = std::pin::pin!(repo.hashes());
|
||||
let mut hash_stream = hash_stream.into_streamer();
|
||||
|
||||
let mut count: u64 = 0;
|
||||
|
||||
while let Some(hash) = hash_stream.try_next().await? {
|
||||
while let Some(hash) = hash_stream.try_next().await.retry()? {
|
||||
tracing::trace!("prune: looping");
|
||||
|
||||
let repo = repo.clone();
|
||||
|
@ -307,7 +335,8 @@ where
|
|||
}
|
||||
|
||||
repo.set("prune-missing-complete", b"1".to_vec().into())
|
||||
.await?;
|
||||
.await
|
||||
.retry()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@ use crate::{
|
|||
concurrent_processor::ProcessMap,
|
||||
error::{Error, UploadError},
|
||||
formats::InputProcessableFormat,
|
||||
future::LocalBoxFuture,
|
||||
future::WithPollTimer,
|
||||
ingest::Session,
|
||||
queue::Process,
|
||||
repo::{Alias, UploadId, UploadResult},
|
||||
|
@ -15,49 +15,54 @@ use crate::{
|
|||
};
|
||||
use std::{path::PathBuf, sync::Arc};
|
||||
|
||||
use super::{JobContext, JobFuture, JobResult};
|
||||
|
||||
pub(super) fn perform<'a, S>(
|
||||
state: &'a State<S>,
|
||||
process_map: &'a ProcessMap,
|
||||
job: serde_json::Value,
|
||||
) -> LocalBoxFuture<'a, Result<(), Error>>
|
||||
) -> JobFuture<'a>
|
||||
where
|
||||
S: Store + 'static,
|
||||
{
|
||||
Box::pin(async move {
|
||||
match serde_json::from_value(job) {
|
||||
Ok(job) => match job {
|
||||
Process::Ingest {
|
||||
identifier,
|
||||
upload_id,
|
||||
declared_alias,
|
||||
} => {
|
||||
process_ingest(
|
||||
state,
|
||||
Arc::from(identifier),
|
||||
Serde::into_inner(upload_id),
|
||||
declared_alias.map(Serde::into_inner),
|
||||
)
|
||||
.await?
|
||||
}
|
||||
Process::Generate {
|
||||
let job_text = format!("{job}");
|
||||
|
||||
let job = serde_json::from_value(job)
|
||||
.map_err(|e| UploadError::InvalidJob(e, job_text))
|
||||
.abort()?;
|
||||
|
||||
match job {
|
||||
Process::Ingest {
|
||||
identifier,
|
||||
upload_id,
|
||||
declared_alias,
|
||||
} => {
|
||||
process_ingest(
|
||||
state,
|
||||
Arc::from(identifier),
|
||||
Serde::into_inner(upload_id),
|
||||
declared_alias.map(Serde::into_inner),
|
||||
)
|
||||
.with_poll_timer("process-ingest")
|
||||
.await?
|
||||
}
|
||||
Process::Generate {
|
||||
target_format,
|
||||
source,
|
||||
process_path,
|
||||
process_args,
|
||||
} => {
|
||||
generate(
|
||||
state,
|
||||
process_map,
|
||||
target_format,
|
||||
source,
|
||||
Serde::into_inner(source),
|
||||
process_path,
|
||||
process_args,
|
||||
} => {
|
||||
generate(
|
||||
state,
|
||||
process_map,
|
||||
target_format,
|
||||
Serde::into_inner(source),
|
||||
process_path,
|
||||
process_args,
|
||||
)
|
||||
.await?
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::warn!("Invalid job: {}", format!("{e}"));
|
||||
)
|
||||
.with_poll_timer("process-generate")
|
||||
.await?
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,13 +110,13 @@ async fn process_ingest<S>(
|
|||
unprocessed_identifier: Arc<str>,
|
||||
upload_id: UploadId,
|
||||
declared_alias: Option<Alias>,
|
||||
) -> Result<(), Error>
|
||||
) -> JobResult
|
||||
where
|
||||
S: Store + 'static,
|
||||
{
|
||||
let guard = UploadGuard::guard(upload_id);
|
||||
|
||||
let fut = async {
|
||||
let res = async {
|
||||
let ident = unprocessed_identifier.clone();
|
||||
let state2 = state.clone();
|
||||
|
||||
|
@ -135,25 +140,33 @@ where
|
|||
state.store.remove(&unprocessed_identifier).await?;
|
||||
|
||||
error_boundary.map_err(|_| UploadError::Canceled)?
|
||||
};
|
||||
}
|
||||
.await;
|
||||
|
||||
let result = match fut.await {
|
||||
let (result, err) = match res {
|
||||
Ok(session) => {
|
||||
let alias = session.alias().take().expect("Alias should exist").clone();
|
||||
let token = session.disarm();
|
||||
UploadResult::Success { alias, token }
|
||||
(UploadResult::Success { alias, token }, None)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to ingest\n{}\n{}", format!("{e}"), format!("{e:?}"));
|
||||
|
||||
Err(e) => (
|
||||
UploadResult::Failure {
|
||||
message: e.root_cause().to_string(),
|
||||
code: e.error_code().into_owned(),
|
||||
}
|
||||
}
|
||||
},
|
||||
Some(e),
|
||||
),
|
||||
};
|
||||
|
||||
state.repo.complete_upload(upload_id, result).await?;
|
||||
state
|
||||
.repo
|
||||
.complete_upload(upload_id, result)
|
||||
.await
|
||||
.retry()?;
|
||||
|
||||
if let Some(e) = err {
|
||||
return Err(e).abort();
|
||||
}
|
||||
|
||||
guard.disarm();
|
||||
|
||||
|
@ -168,23 +181,28 @@ async fn generate<S: Store + 'static>(
|
|||
source: Alias,
|
||||
process_path: PathBuf,
|
||||
process_args: Vec<String>,
|
||||
) -> Result<(), Error> {
|
||||
let Some(hash) = state.repo.hash(&source).await? else {
|
||||
// Nothing to do
|
||||
return Ok(());
|
||||
};
|
||||
) -> JobResult {
|
||||
let hash = state
|
||||
.repo
|
||||
.hash(&source)
|
||||
.await
|
||||
.retry()?
|
||||
.ok_or(UploadError::MissingAlias)
|
||||
.abort()?;
|
||||
|
||||
let path_string = process_path.to_string_lossy().to_string();
|
||||
let identifier_opt = state
|
||||
.repo
|
||||
.variant_identifier(hash.clone(), path_string)
|
||||
.await?;
|
||||
.await
|
||||
.retry()?;
|
||||
|
||||
if identifier_opt.is_some() {
|
||||
// don't generate already-generated variant
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let original_details = crate::ensure_details(state, &source).await?;
|
||||
let original_details = crate::ensure_details(state, &source).await.retry()?;
|
||||
|
||||
crate::generate::generate(
|
||||
state,
|
||||
|
@ -195,7 +213,8 @@ async fn generate<S: Store + 'static>(
|
|||
&original_details,
|
||||
hash,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.abort()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
11
src/repo.rs
11
src/repo.rs
|
@ -337,6 +337,13 @@ where
|
|||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) struct JobId(Uuid);
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) enum JobResult {
|
||||
Success,
|
||||
Failure,
|
||||
Aborted,
|
||||
}
|
||||
|
||||
impl JobId {
|
||||
pub(crate) fn gen() -> Self {
|
||||
Self(Uuid::now_v7())
|
||||
|
@ -380,6 +387,7 @@ pub(crate) trait QueueRepo: BaseRepo {
|
|||
queue: &'static str,
|
||||
worker_id: Uuid,
|
||||
job_id: JobId,
|
||||
job_status: JobResult,
|
||||
) -> Result<(), RepoError>;
|
||||
}
|
||||
|
||||
|
@ -423,8 +431,9 @@ where
|
|||
queue: &'static str,
|
||||
worker_id: Uuid,
|
||||
job_id: JobId,
|
||||
job_status: JobResult,
|
||||
) -> Result<(), RepoError> {
|
||||
T::complete_job(self, queue, worker_id, job_id).await
|
||||
T::complete_job(self, queue, worker_id, job_id, job_status).await
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -43,9 +43,9 @@ use self::job_status::JobStatus;
|
|||
use super::{
|
||||
metrics::{PopMetricsGuard, PushMetricsGuard, WaitMetricsGuard},
|
||||
Alias, AliasAccessRepo, AliasAlreadyExists, AliasRepo, BaseRepo, DeleteToken, DetailsRepo,
|
||||
FullRepo, Hash, HashAlreadyExists, HashPage, HashRepo, JobId, OrderedHash, ProxyRepo,
|
||||
QueueRepo, RepoError, SettingsRepo, StoreMigrationRepo, UploadId, UploadRepo, UploadResult,
|
||||
VariantAccessRepo, VariantAlreadyExists,
|
||||
FullRepo, Hash, HashAlreadyExists, HashPage, HashRepo, JobId, JobResult, OrderedHash,
|
||||
ProxyRepo, QueueRepo, RepoError, SettingsRepo, StoreMigrationRepo, UploadId, UploadRepo,
|
||||
UploadResult, VariantAccessRepo, VariantAlreadyExists,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -1358,7 +1358,7 @@ impl QueueRepo for PostgresRepo {
|
|||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "debug", skip(self))]
|
||||
#[tracing::instrument(level = "debug", skip_all, fields(job_id))]
|
||||
async fn pop(
|
||||
&self,
|
||||
queue_name: &'static str,
|
||||
|
@ -1420,7 +1420,8 @@ impl QueueRepo for PostgresRepo {
|
|||
queue_alias
|
||||
.field(status)
|
||||
.eq(JobStatus::New)
|
||||
.and(queue_alias.field(queue).eq(queue_name)),
|
||||
.and(queue_alias.field(queue).eq(queue_name))
|
||||
.and(queue_alias.field(retry).ge(1)),
|
||||
)
|
||||
.order(queue_alias.field(queue_time))
|
||||
.for_update()
|
||||
|
@ -1445,20 +1446,21 @@ impl QueueRepo for PostgresRepo {
|
|||
.map_err(PostgresError::Diesel)?;
|
||||
|
||||
if let Some((job_id, job_json)) = opt {
|
||||
tracing::Span::current().record("job_id", &format!("{job_id}"));
|
||||
|
||||
guard.disarm();
|
||||
tracing::debug!("{job_json}");
|
||||
return Ok((JobId(job_id), job_json));
|
||||
}
|
||||
|
||||
drop(conn);
|
||||
if notifier
|
||||
match notifier
|
||||
.notified()
|
||||
.with_timeout(Duration::from_secs(5))
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
tracing::debug!("Notified");
|
||||
} else {
|
||||
tracing::debug!("Timed out");
|
||||
Ok(()) => tracing::debug!("Notified"),
|
||||
Err(_) => tracing::trace!("Timed out"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1499,23 +1501,54 @@ impl QueueRepo for PostgresRepo {
|
|||
queue_name: &'static str,
|
||||
worker_id: Uuid,
|
||||
job_id: JobId,
|
||||
job_status: JobResult,
|
||||
) -> Result<(), RepoError> {
|
||||
use schema::job_queue::dsl::*;
|
||||
|
||||
let mut conn = self.get_connection().await?;
|
||||
|
||||
diesel::delete(job_queue)
|
||||
.filter(
|
||||
id.eq(job_id.0)
|
||||
.and(queue.eq(queue_name))
|
||||
.and(worker.eq(worker_id)),
|
||||
)
|
||||
.execute(&mut conn)
|
||||
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_COMPLETE)
|
||||
.with_timeout(Duration::from_secs(5))
|
||||
.await
|
||||
.map_err(|_| PostgresError::DbTimeout)?
|
||||
.map_err(PostgresError::Diesel)?;
|
||||
if matches!(job_status, JobResult::Failure) {
|
||||
diesel::update(job_queue)
|
||||
.filter(
|
||||
id.eq(job_id.0)
|
||||
.and(queue.eq(queue_name))
|
||||
.and(worker.eq(worker_id)),
|
||||
)
|
||||
.set(retry.eq(retry - 1))
|
||||
.execute(&mut conn)
|
||||
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_RETRY)
|
||||
.with_timeout(Duration::from_secs(5))
|
||||
.await
|
||||
.map_err(|_| PostgresError::DbTimeout)?
|
||||
.map_err(PostgresError::Diesel)?;
|
||||
|
||||
diesel::delete(job_queue)
|
||||
.filter(
|
||||
id.eq(job_id.0)
|
||||
.and(queue.eq(queue_name))
|
||||
.and(worker.eq(worker_id))
|
||||
.and(retry.le(0)),
|
||||
)
|
||||
.execute(&mut conn)
|
||||
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_CLEANUP)
|
||||
.with_timeout(Duration::from_secs(5))
|
||||
.await
|
||||
.map_err(|_| PostgresError::DbTimeout)?
|
||||
.map_err(PostgresError::Diesel)?;
|
||||
} else {
|
||||
diesel::delete(job_queue)
|
||||
.filter(
|
||||
id.eq(job_id.0)
|
||||
.and(queue.eq(queue_name))
|
||||
.and(worker.eq(worker_id)),
|
||||
)
|
||||
.execute(&mut conn)
|
||||
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_COMPLETE)
|
||||
.with_timeout(Duration::from_secs(5))
|
||||
.await
|
||||
.map_err(|_| PostgresError::DbTimeout)?
|
||||
.map_err(PostgresError::Diesel)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
use barrel::backend::Pg;
|
||||
use barrel::{types, Migration};
|
||||
|
||||
pub(crate) fn migration() -> String {
|
||||
let mut m = Migration::new();
|
||||
|
||||
m.change_table("job_queue", |t| {
|
||||
t.add_column("retry", types::integer().nullable(false).default(5));
|
||||
});
|
||||
|
||||
m.make::<Pg>().to_string()
|
||||
}
|
|
@ -44,6 +44,7 @@ diesel::table! {
|
|||
queue_time -> Timestamp,
|
||||
heartbeat -> Nullable<Timestamp>,
|
||||
unique_key -> Nullable<Text>,
|
||||
retry -> Int4,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
121
src/repo/sled.rs
121
src/repo/sled.rs
|
@ -1,6 +1,7 @@
|
|||
use crate::{
|
||||
details::HumanDate,
|
||||
error_code::{ErrorCode, OwnedErrorCode},
|
||||
future::WithTimeout,
|
||||
serde_str::Serde,
|
||||
stream::{from_iterator, LocalBoxStream},
|
||||
};
|
||||
|
@ -12,6 +13,7 @@ use std::{
|
|||
atomic::{AtomicU64, Ordering},
|
||||
Arc, RwLock,
|
||||
},
|
||||
time::Duration,
|
||||
};
|
||||
use tokio::sync::Notify;
|
||||
use url::Url;
|
||||
|
@ -21,9 +23,9 @@ use super::{
|
|||
hash::Hash,
|
||||
metrics::{PopMetricsGuard, PushMetricsGuard, WaitMetricsGuard},
|
||||
Alias, AliasAccessRepo, AliasAlreadyExists, AliasRepo, BaseRepo, DeleteToken, Details,
|
||||
DetailsRepo, FullRepo, HashAlreadyExists, HashPage, HashRepo, JobId, OrderedHash, ProxyRepo,
|
||||
QueueRepo, RepoError, SettingsRepo, StoreMigrationRepo, UploadId, UploadRepo, UploadResult,
|
||||
VariantAccessRepo, VariantAlreadyExists,
|
||||
DetailsRepo, FullRepo, HashAlreadyExists, HashPage, HashRepo, JobId, JobResult, OrderedHash,
|
||||
ProxyRepo, QueueRepo, RepoError, SettingsRepo, StoreMigrationRepo, UploadId, UploadRepo,
|
||||
UploadResult, VariantAccessRepo, VariantAlreadyExists,
|
||||
};
|
||||
|
||||
macro_rules! b {
|
||||
|
@ -99,6 +101,7 @@ pub(crate) struct SledRepo {
|
|||
unique_jobs: Tree,
|
||||
unique_jobs_inverse: Tree,
|
||||
job_state: Tree,
|
||||
job_retries: Tree,
|
||||
alias_access: Tree,
|
||||
inverse_alias_access: Tree,
|
||||
variant_access: Tree,
|
||||
|
@ -141,6 +144,7 @@ impl SledRepo {
|
|||
unique_jobs: db.open_tree("pict-rs-unique-jobs-tree")?,
|
||||
unique_jobs_inverse: db.open_tree("pict-rs-unique-jobs-inverse-tree")?,
|
||||
job_state: db.open_tree("pict-rs-job-state-tree")?,
|
||||
job_retries: db.open_tree("pict-rs-job-retries-tree")?,
|
||||
alias_access: db.open_tree("pict-rs-alias-access-tree")?,
|
||||
inverse_alias_access: db.open_tree("pict-rs-inverse-alias-access-tree")?,
|
||||
variant_access: db.open_tree("pict-rs-variant-access-tree")?,
|
||||
|
@ -653,28 +657,37 @@ impl QueueRepo for SledRepo {
|
|||
let unique_jobs = self.unique_jobs.clone();
|
||||
let unique_jobs_inverse = self.unique_jobs_inverse.clone();
|
||||
let job_state = self.job_state.clone();
|
||||
let job_retries = self.job_retries.clone();
|
||||
|
||||
let res = crate::sync::spawn_blocking("sled-io", move || {
|
||||
(&queue, &unique_jobs, &unique_jobs_inverse, &job_state).transaction(
|
||||
|(queue, unique_jobs, unique_jobs_inverse, job_state)| {
|
||||
let state = JobState::pending();
|
||||
|
||||
queue.insert(&key[..], &job[..])?;
|
||||
if let Some(unique_key) = unique_key {
|
||||
if unique_jobs
|
||||
.insert(unique_key.as_bytes(), &key[..])?
|
||||
.is_some()
|
||||
{
|
||||
return sled::transaction::abort(());
|
||||
}
|
||||
|
||||
unique_jobs_inverse.insert(&key[..], unique_key.as_bytes())?;
|
||||
}
|
||||
job_state.insert(&key[..], state.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
},
|
||||
(
|
||||
&queue,
|
||||
&unique_jobs,
|
||||
&unique_jobs_inverse,
|
||||
&job_state,
|
||||
&job_retries,
|
||||
)
|
||||
.transaction(
|
||||
|(queue, unique_jobs, unique_jobs_inverse, job_state, job_retries)| {
|
||||
let state = JobState::pending();
|
||||
|
||||
queue.insert(&key[..], &job[..])?;
|
||||
if let Some(unique_key) = unique_key {
|
||||
if unique_jobs
|
||||
.insert(unique_key.as_bytes(), &key[..])?
|
||||
.is_some()
|
||||
{
|
||||
return sled::transaction::abort(());
|
||||
}
|
||||
|
||||
unique_jobs_inverse.insert(&key[..], unique_key.as_bytes())?;
|
||||
}
|
||||
job_state.insert(&key[..], state.as_bytes())?;
|
||||
job_retries.insert(&key[..], &(5_u64.to_be_bytes())[..])?;
|
||||
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(|_| RepoError::Canceled)?;
|
||||
|
@ -703,7 +716,7 @@ impl QueueRepo for SledRepo {
|
|||
Ok(Some(id))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self, worker_id), fields(job_id))]
|
||||
#[tracing::instrument(skip_all, fields(job_id))]
|
||||
async fn pop(
|
||||
&self,
|
||||
queue_name: &'static str,
|
||||
|
@ -719,7 +732,6 @@ impl QueueRepo for SledRepo {
|
|||
let queue = self.queue.clone();
|
||||
let job_state = self.job_state.clone();
|
||||
|
||||
let span = tracing::Span::current();
|
||||
let opt = crate::sync::spawn_blocking("sled-io", move || {
|
||||
// Job IDs are generated with Uuid version 7 - defining their first bits as a
|
||||
// timestamp. Scanning a prefix should give us jobs in the order they were queued.
|
||||
|
@ -760,8 +772,6 @@ impl QueueRepo for SledRepo {
|
|||
|
||||
let job_id = JobId::from_bytes(id_bytes);
|
||||
|
||||
span.record("job_id", &format!("{job_id:?}"));
|
||||
|
||||
let opt = queue
|
||||
.get(&key)?
|
||||
.map(|ivec| serde_json::from_slice(&ivec[..]))
|
||||
|
@ -777,9 +787,12 @@ impl QueueRepo for SledRepo {
|
|||
.await
|
||||
.map_err(|_| RepoError::Canceled)??;
|
||||
|
||||
if let Some(tup) = opt {
|
||||
if let Some((job_id, job_json)) = opt {
|
||||
tracing::Span::current().record("job_id", &format!("{}", job_id.0));
|
||||
|
||||
metrics_guard.disarm();
|
||||
return Ok(tup);
|
||||
tracing::debug!("{job_json}");
|
||||
return Ok((job_id, job_json));
|
||||
}
|
||||
|
||||
let opt = self
|
||||
|
@ -797,7 +810,14 @@ impl QueueRepo for SledRepo {
|
|||
Arc::clone(entry)
|
||||
};
|
||||
|
||||
notify.notified().await
|
||||
match notify
|
||||
.notified()
|
||||
.with_timeout(Duration::from_secs(30))
|
||||
.await
|
||||
{
|
||||
Ok(()) => tracing::debug!("Notified"),
|
||||
Err(_) => tracing::trace!("Timed out"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -836,25 +856,50 @@ impl QueueRepo for SledRepo {
|
|||
queue_name: &'static str,
|
||||
_worker_id: Uuid,
|
||||
job_id: JobId,
|
||||
job_status: JobResult,
|
||||
) -> Result<(), RepoError> {
|
||||
let retry = matches!(job_status, JobResult::Failure);
|
||||
|
||||
let key = job_key(queue_name, job_id);
|
||||
|
||||
let queue = self.queue.clone();
|
||||
let unique_jobs = self.unique_jobs.clone();
|
||||
let unique_jobs_inverse = self.unique_jobs_inverse.clone();
|
||||
let job_state = self.job_state.clone();
|
||||
let job_retries = self.job_retries.clone();
|
||||
|
||||
let res = crate::sync::spawn_blocking("sled-io", move || {
|
||||
(&queue, &unique_jobs, &unique_jobs_inverse, &job_state).transaction(
|
||||
|(queue, unique_jobs, unique_jobs_inverse, job_state)| {
|
||||
queue.remove(&key[..])?;
|
||||
if let Some(unique_key) = unique_jobs_inverse.remove(&key[..])? {
|
||||
unique_jobs.remove(unique_key)?;
|
||||
}
|
||||
job_state.remove(&key[..])?;
|
||||
Ok(())
|
||||
},
|
||||
(
|
||||
&queue,
|
||||
&unique_jobs,
|
||||
&unique_jobs_inverse,
|
||||
&job_state,
|
||||
&job_retries,
|
||||
)
|
||||
.transaction(
|
||||
|(queue, unique_jobs, unique_jobs_inverse, job_state, job_retries)| {
|
||||
let retries = job_retries.get(&key[..])?;
|
||||
|
||||
let retry_count = retries
|
||||
.and_then(|ivec| ivec[0..8].try_into().ok())
|
||||
.map(u64::from_be_bytes)
|
||||
.unwrap_or(5_u64)
|
||||
.saturating_sub(1);
|
||||
|
||||
if retry_count > 0 && retry {
|
||||
job_retries.insert(&key[..], &(retry_count.to_be_bytes())[..])?;
|
||||
} else {
|
||||
queue.remove(&key[..])?;
|
||||
if let Some(unique_key) = unique_jobs_inverse.remove(&key[..])? {
|
||||
unique_jobs.remove(unique_key)?;
|
||||
}
|
||||
job_state.remove(&key[..])?;
|
||||
job_retries.remove(&key[..])?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(|_| RepoError::Canceled)?;
|
||||
|
|
|
@ -5,6 +5,8 @@ use tokio::{
|
|||
task::JoinHandle,
|
||||
};
|
||||
|
||||
use crate::future::WithPollTimer;
|
||||
|
||||
pub(crate) struct DropHandle<T> {
|
||||
handle: JoinHandle<T>,
|
||||
}
|
||||
|
@ -75,13 +77,12 @@ pub(crate) fn bare_semaphore(permits: usize) -> Semaphore {
|
|||
}
|
||||
|
||||
#[track_caller]
|
||||
pub(crate) fn spawn<F>(name: &str, future: F) -> tokio::task::JoinHandle<F::Output>
|
||||
pub(crate) fn spawn<F>(name: &'static str, future: F) -> tokio::task::JoinHandle<F::Output>
|
||||
where
|
||||
F: std::future::Future + 'static,
|
||||
F::Output: 'static,
|
||||
{
|
||||
#[cfg(not(tokio_unstable))]
|
||||
let _ = name;
|
||||
let future = future.with_poll_timer(name);
|
||||
|
||||
let span = tracing::trace_span!(parent: None, "spawn task");
|
||||
let guard = span.enter();
|
||||
|
|
Loading…
Reference in a new issue