mirror of
https://git.asonix.dog/asonix/pict-rs.git
synced 2025-01-01 07:08:42 +00:00
Add new feature for testing with errors, test & fix job retries
This commit is contained in:
parent
286279cdf5
commit
6f95c72070
13 changed files with 114 additions and 21 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -1837,6 +1837,7 @@ dependencies = [
|
||||||
"metrics",
|
"metrics",
|
||||||
"metrics-exporter-prometheus",
|
"metrics-exporter-prometheus",
|
||||||
"mime",
|
"mime",
|
||||||
|
"nanorand",
|
||||||
"opentelemetry",
|
"opentelemetry",
|
||||||
"opentelemetry-otlp",
|
"opentelemetry-otlp",
|
||||||
"opentelemetry_sdk",
|
"opentelemetry_sdk",
|
||||||
|
|
|
@ -16,6 +16,7 @@ strip = true
|
||||||
default = []
|
default = []
|
||||||
io-uring = ["dep:tokio-uring", "sled/io_uring", "actix-web/experimental-io-uring"]
|
io-uring = ["dep:tokio-uring", "sled/io_uring", "actix-web/experimental-io-uring"]
|
||||||
poll-timer-warnings = []
|
poll-timer-warnings = []
|
||||||
|
random-errors = ["dep:nanorand"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
actix-form-data = "0.7.0-beta.6"
|
actix-form-data = "0.7.0-beta.6"
|
||||||
|
@ -40,6 +41,7 @@ md-5 = "0.10.5"
|
||||||
metrics = "0.22.0"
|
metrics = "0.22.0"
|
||||||
metrics-exporter-prometheus = { version = "0.13.0", default-features = false, features = ["http-listener"] }
|
metrics-exporter-prometheus = { version = "0.13.0", default-features = false, features = ["http-listener"] }
|
||||||
mime = "0.3.1"
|
mime = "0.3.1"
|
||||||
|
nanorand = { version = "0.7", optional = true }
|
||||||
opentelemetry_sdk = { version = "0.22", features = ["rt-tokio"] }
|
opentelemetry_sdk = { version = "0.22", features = ["rt-tokio"] }
|
||||||
opentelemetry = "0.22"
|
opentelemetry = "0.22"
|
||||||
opentelemetry-otlp = "0.15"
|
opentelemetry-otlp = "0.15"
|
||||||
|
|
|
@ -59,12 +59,12 @@ impl BytesStream {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn into_io_stream(self) -> impl Stream<Item = std::io::Result<Bytes>> {
|
pub(crate) fn into_io_stream(self) -> impl Stream<Item = std::io::Result<Bytes>> {
|
||||||
streem::from_fn(move |yielder| async move {
|
crate::stream::error_injector(streem::from_fn(move |yielder| async move {
|
||||||
for bytes in self {
|
for bytes in self {
|
||||||
crate::sync::cooperate().await;
|
crate::sync::cooperate().await;
|
||||||
yielder.yield_ok(bytes).await;
|
yielder.yield_ok(bytes).await;
|
||||||
}
|
}
|
||||||
})
|
}))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -167,6 +167,10 @@ pub(crate) enum UploadError {
|
||||||
|
|
||||||
#[error("Failed external validation")]
|
#[error("Failed external validation")]
|
||||||
FailedExternalValidation,
|
FailedExternalValidation,
|
||||||
|
|
||||||
|
#[cfg(feature = "random-errors")]
|
||||||
|
#[error("Randomly generated error for testing purposes")]
|
||||||
|
RandomError,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UploadError {
|
impl UploadError {
|
||||||
|
@ -205,6 +209,8 @@ impl UploadError {
|
||||||
Self::ProcessTimeout => ErrorCode::COMMAND_TIMEOUT,
|
Self::ProcessTimeout => ErrorCode::COMMAND_TIMEOUT,
|
||||||
Self::FailedExternalValidation => ErrorCode::FAILED_EXTERNAL_VALIDATION,
|
Self::FailedExternalValidation => ErrorCode::FAILED_EXTERNAL_VALIDATION,
|
||||||
Self::InvalidJob(_, _) => ErrorCode::INVALID_JOB,
|
Self::InvalidJob(_, _) => ErrorCode::INVALID_JOB,
|
||||||
|
#[cfg(feature = "random-errors")]
|
||||||
|
Self::RandomError => ErrorCode::RANDOM_ERROR,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -147,4 +147,8 @@ impl ErrorCode {
|
||||||
pub(crate) const INVALID_JOB: ErrorCode = ErrorCode {
|
pub(crate) const INVALID_JOB: ErrorCode = ErrorCode {
|
||||||
code: "invalid-job",
|
code: "invalid-job",
|
||||||
};
|
};
|
||||||
|
#[cfg(feature = "random-errors")]
|
||||||
|
pub(crate) const RANDOM_ERROR: ErrorCode = ErrorCode {
|
||||||
|
code: "random-error",
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -167,11 +167,18 @@ where
|
||||||
#[cfg(not(feature = "poll-timer-warnings"))]
|
#[cfg(not(feature = "poll-timer-warnings"))]
|
||||||
tracing::debug!("Future {} polled for {} ms", this.name, elapsed.as_millis());
|
tracing::debug!("Future {} polled for {} ms", this.name, elapsed.as_millis());
|
||||||
} else if elapsed > Duration::from_micros(200) {
|
} else if elapsed > Duration::from_micros(200) {
|
||||||
|
#[cfg(feature = "poll-timer-warnings")]
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
"Future {} polled for {} microseconds",
|
"Future {} polled for {} microseconds",
|
||||||
this.name,
|
this.name,
|
||||||
elapsed.as_micros(),
|
elapsed.as_micros(),
|
||||||
);
|
);
|
||||||
|
#[cfg(not(feature = "poll-timer-warnings"))]
|
||||||
|
tracing::trace!(
|
||||||
|
"Future {} polled for {} microseconds",
|
||||||
|
this.name,
|
||||||
|
elapsed.as_micros(),
|
||||||
|
);
|
||||||
} else if elapsed > Duration::from_micros(1) {
|
} else if elapsed > Duration::from_micros(1) {
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
"Future {} polled for {} microseconds",
|
"Future {} polled for {} microseconds",
|
||||||
|
|
|
@ -1914,6 +1914,11 @@ impl PictRsConfiguration {
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
pub async fn run(self) -> color_eyre::Result<()> {
|
pub async fn run(self) -> color_eyre::Result<()> {
|
||||||
|
#[cfg(feature = "random-errors")]
|
||||||
|
tracing::error!("pict-rs has been compiled with with the 'random-errors' feature enabled.");
|
||||||
|
#[cfg(feature = "random-errors")]
|
||||||
|
tracing::error!("This is not suitable for production environments");
|
||||||
|
|
||||||
let PictRsConfiguration { config, operation } = self;
|
let PictRsConfiguration { config, operation } = self;
|
||||||
|
|
||||||
// describe all the metrics pict-rs produces
|
// describe all the metrics pict-rs produces
|
||||||
|
|
|
@ -23,6 +23,15 @@ where
|
||||||
Box::pin(async move {
|
Box::pin(async move {
|
||||||
let job_text = format!("{job}");
|
let job_text = format!("{job}");
|
||||||
|
|
||||||
|
#[cfg(feature = "random-errors")]
|
||||||
|
{
|
||||||
|
use nanorand::Rng;
|
||||||
|
|
||||||
|
if nanorand::tls_rng().generate_range(0..25) < 1 {
|
||||||
|
return Err(crate::error::UploadError::RandomError).retry();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let job = serde_json::from_value(job)
|
let job = serde_json::from_value(job)
|
||||||
.map_err(|e| UploadError::InvalidJob(e, job_text))
|
.map_err(|e| UploadError::InvalidJob(e, job_text))
|
||||||
.abort()?;
|
.abort()?;
|
||||||
|
|
|
@ -1548,14 +1548,14 @@ impl QueueRepo for PostgresRepo {
|
||||||
|
|
||||||
let mut conn = self.get_connection().await?;
|
let mut conn = self.get_connection().await?;
|
||||||
|
|
||||||
if matches!(job_status, JobResult::Failure) {
|
let count = if matches!(job_status, JobResult::Failure) {
|
||||||
diesel::update(job_queue)
|
diesel::update(job_queue)
|
||||||
.filter(
|
.filter(
|
||||||
id.eq(job_id.0)
|
id.eq(job_id.0)
|
||||||
.and(queue.eq(queue_name))
|
.and(queue.eq(queue_name))
|
||||||
.and(worker.eq(worker_id)),
|
.and(worker.eq(worker_id)),
|
||||||
)
|
)
|
||||||
.set(retry.eq(retry - 1))
|
.set((retry.eq(retry - 1), worker.eq(Option::<Uuid>::None)))
|
||||||
.execute(&mut conn)
|
.execute(&mut conn)
|
||||||
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_RETRY)
|
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_RETRY)
|
||||||
.with_timeout(Duration::from_secs(5))
|
.with_timeout(Duration::from_secs(5))
|
||||||
|
@ -1564,18 +1564,13 @@ impl QueueRepo for PostgresRepo {
|
||||||
.map_err(PostgresError::Diesel)?;
|
.map_err(PostgresError::Diesel)?;
|
||||||
|
|
||||||
diesel::delete(job_queue)
|
diesel::delete(job_queue)
|
||||||
.filter(
|
.filter(id.eq(job_id.0).and(retry.le(0)))
|
||||||
id.eq(job_id.0)
|
|
||||||
.and(queue.eq(queue_name))
|
|
||||||
.and(worker.eq(worker_id))
|
|
||||||
.and(retry.le(0)),
|
|
||||||
)
|
|
||||||
.execute(&mut conn)
|
.execute(&mut conn)
|
||||||
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_CLEANUP)
|
.with_metrics(crate::init_metrics::POSTGRES_QUEUE_CLEANUP)
|
||||||
.with_timeout(Duration::from_secs(5))
|
.with_timeout(Duration::from_secs(5))
|
||||||
.await
|
.await
|
||||||
.map_err(|_| PostgresError::DbTimeout)?
|
.map_err(|_| PostgresError::DbTimeout)?
|
||||||
.map_err(PostgresError::Diesel)?;
|
.map_err(PostgresError::Diesel)?
|
||||||
} else {
|
} else {
|
||||||
diesel::delete(job_queue)
|
diesel::delete(job_queue)
|
||||||
.filter(
|
.filter(
|
||||||
|
@ -1588,7 +1583,20 @@ impl QueueRepo for PostgresRepo {
|
||||||
.with_timeout(Duration::from_secs(5))
|
.with_timeout(Duration::from_secs(5))
|
||||||
.await
|
.await
|
||||||
.map_err(|_| PostgresError::DbTimeout)?
|
.map_err(|_| PostgresError::DbTimeout)?
|
||||||
.map_err(PostgresError::Diesel)?;
|
.map_err(PostgresError::Diesel)?
|
||||||
|
};
|
||||||
|
|
||||||
|
match job_status {
|
||||||
|
JobResult::Success => tracing::debug!("completed {job_id:?}"),
|
||||||
|
JobResult::Failure if count == 0 => {
|
||||||
|
tracing::info!("{job_id:?} failed, marked for retry")
|
||||||
|
}
|
||||||
|
JobResult::Failure => tracing::warn!("{job_id:?} failed permantently"),
|
||||||
|
JobResult::Aborted => tracing::warn!("{job_id:?} dead"),
|
||||||
|
}
|
||||||
|
|
||||||
|
if count > 0 {
|
||||||
|
tracing::debug!("Deleted {count} jobs");
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -899,15 +899,25 @@ impl QueueRepo for SledRepo {
|
||||||
job_retries.remove(&key[..])?;
|
job_retries.remove(&key[..])?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(retry_count > 0 && retry)
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(|_| RepoError::Canceled)?;
|
.map_err(|_| RepoError::Canceled)?;
|
||||||
|
|
||||||
if let Err(TransactionError::Abort(e) | TransactionError::Storage(e)) = res {
|
match res {
|
||||||
return Err(RepoError::from(SledError::from(e)));
|
Err(TransactionError::Abort(e) | TransactionError::Storage(e)) => {
|
||||||
|
return Err(RepoError::from(SledError::from(e)));
|
||||||
|
}
|
||||||
|
Ok(retried) => match job_status {
|
||||||
|
JobResult::Success => tracing::debug!("completed {job_id:?}"),
|
||||||
|
JobResult::Failure if retried => {
|
||||||
|
tracing::info!("{job_id:?} failed, marked for retry")
|
||||||
|
}
|
||||||
|
JobResult::Failure => tracing::warn!("{job_id:?} failed permantently"),
|
||||||
|
JobResult::Aborted => tracing::warn!("{job_id:?} dead"),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -62,7 +62,10 @@ impl Store for FileStore {
|
||||||
{
|
{
|
||||||
let path = self.next_file(extension);
|
let path = self.next_file(extension);
|
||||||
|
|
||||||
if let Err(e) = self.safe_save_stream(&path, stream).await {
|
if let Err(e) = self
|
||||||
|
.safe_save_stream(&path, crate::stream::error_injector(stream))
|
||||||
|
.await
|
||||||
|
{
|
||||||
self.safe_remove_file(&path).await?;
|
self.safe_remove_file(&path).await?;
|
||||||
return Err(e.into());
|
return Err(e.into());
|
||||||
}
|
}
|
||||||
|
@ -95,7 +98,7 @@ impl Store for FileStore {
|
||||||
.instrument(file_span)
|
.instrument(file_span)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
Ok(Box::pin(stream))
|
Ok(Box::pin(crate::stream::error_injector(stream)))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip(self))]
|
#[tracing::instrument(skip(self))]
|
||||||
|
|
|
@ -216,7 +216,11 @@ impl Store for ObjectStore {
|
||||||
S: Stream<Item = std::io::Result<Bytes>>,
|
S: Stream<Item = std::io::Result<Bytes>>,
|
||||||
{
|
{
|
||||||
match self
|
match self
|
||||||
.start_upload(stream, content_type.clone(), extension)
|
.start_upload(
|
||||||
|
crate::stream::error_injector(stream),
|
||||||
|
content_type.clone(),
|
||||||
|
extension,
|
||||||
|
)
|
||||||
.await?
|
.await?
|
||||||
{
|
{
|
||||||
UploadState::Single(first_chunk) => {
|
UploadState::Single(first_chunk) => {
|
||||||
|
@ -306,9 +310,11 @@ impl Store for ObjectStore {
|
||||||
return Err(status_error(response, Some(identifier.clone())).await);
|
return Err(status_error(response, Some(identifier.clone())).await);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Box::pin(crate::stream::metrics(
|
Ok(Box::pin(crate::stream::error_injector(
|
||||||
crate::init_metrics::OBJECT_STORAGE_GET_OBJECT_REQUEST_STREAM,
|
crate::stream::metrics(
|
||||||
crate::stream::map_err(response.bytes_stream(), payload_to_io_error),
|
crate::init_metrics::OBJECT_STORAGE_GET_OBJECT_REQUEST_STREAM,
|
||||||
|
crate::stream::map_err(response.bytes_stream(), payload_to_io_error),
|
||||||
|
),
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,38 @@ use streem::IntoStreamer;
|
||||||
|
|
||||||
use crate::future::WithMetrics;
|
use crate::future::WithMetrics;
|
||||||
|
|
||||||
|
#[cfg(not(feature = "random-errors"))]
|
||||||
|
pub(crate) fn error_injector(
|
||||||
|
stream: impl Stream<Item = std::io::Result<Bytes>>,
|
||||||
|
) -> impl Stream<Item = std::io::Result<Bytes>> {
|
||||||
|
stream
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "random-errors")]
|
||||||
|
pub(crate) fn error_injector(
|
||||||
|
stream: impl Stream<Item = std::io::Result<Bytes>>,
|
||||||
|
) -> impl Stream<Item = std::io::Result<Bytes>> {
|
||||||
|
streem::try_from_fn(|yielder| async move {
|
||||||
|
let stream = std::pin::pin!(stream);
|
||||||
|
let mut streamer = stream.into_streamer();
|
||||||
|
|
||||||
|
while let Some(item) = streamer.try_next().await? {
|
||||||
|
yielder.yield_ok(item).await;
|
||||||
|
|
||||||
|
use nanorand::Rng;
|
||||||
|
|
||||||
|
if nanorand::tls_rng().generate_range(0..1000) < 1 {
|
||||||
|
return Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::Other,
|
||||||
|
crate::error::UploadError::RandomError,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn take<S>(stream: S, amount: usize) -> impl Stream<Item = S::Item>
|
pub(crate) fn take<S>(stream: S, amount: usize) -> impl Stream<Item = S::Item>
|
||||||
where
|
where
|
||||||
S: Stream,
|
S: Stream,
|
||||||
|
|
Loading…
Reference in a new issue