Introduce job timeout

This commit is contained in:
asonix 2020-03-20 22:04:23 -05:00
parent 74ac3a9b61
commit b40dc7dc93
7 changed files with 49 additions and 10 deletions

View file

@ -11,7 +11,7 @@ actix = "0.10.0-alpha.2"
actix-rt = "1.0.0" actix-rt = "1.0.0"
anyhow = "1.0" anyhow = "1.0"
async-trait = "0.1.24" async-trait = "0.1.24"
background-jobs = { version = "0.7.0", path = "../.." } background-jobs = { version = "0.8.0-alpha.0", path = "../.." }
env_logger = "0.7" env_logger = "0.7"
sled-extensions = "0.2.0" sled-extensions = "0.2.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -44,4 +44,13 @@ pub trait Job: Serialize + DeserializeOwned + 'static {
fn backoff_strategy(&self) -> Option<Backoff> { fn backoff_strategy(&self) -> Option<Backoff> {
None None
} }
/// Define the maximum number of milliseconds this job should be allowed to run before being
/// considered dead.
///
/// This is important for allowing the job server to reap processes that were started but never
/// completed.
fn timeout(&self) -> Option<i64> {
None
}
} }

View file

@ -1,5 +1,5 @@
use crate::{Backoff, JobResult, JobStatus, MaxRetries, ShouldStop}; use crate::{Backoff, JobResult, JobStatus, MaxRetries, ShouldStop};
use chrono::{offset::Utc, DateTime, Duration as OldDuration}; use chrono::{offset::Utc, DateTime, Duration};
use log::trace; use log::trace;
use serde_json::Value; use serde_json::Value;
@ -53,6 +53,11 @@ pub struct NewJobInfo {
/// The time this job should be dequeued /// The time this job should be dequeued
next_queue: Option<DateTime<Utc>>, next_queue: Option<DateTime<Utc>>,
/// Milliseconds from execution until the job is considered dead
///
/// This is important for storage implementations to reap unfinished jobs
timeout: i64,
} }
impl NewJobInfo { impl NewJobInfo {
@ -66,6 +71,7 @@ impl NewJobInfo {
args: Value, args: Value,
max_retries: MaxRetries, max_retries: MaxRetries,
backoff_strategy: Backoff, backoff_strategy: Backoff,
timeout: i64,
) -> Self { ) -> Self {
NewJobInfo { NewJobInfo {
processor, processor,
@ -74,6 +80,7 @@ impl NewJobInfo {
max_retries, max_retries,
next_queue: None, next_queue: None,
backoff_strategy, backoff_strategy,
timeout,
} }
} }
@ -99,6 +106,7 @@ impl NewJobInfo {
next_queue: self.next_queue, next_queue: self.next_queue,
backoff_strategy: self.backoff_strategy, backoff_strategy: self.backoff_strategy,
updated_at: Utc::now(), updated_at: Utc::now(),
timeout: self.timeout,
} }
} }
} }
@ -140,6 +148,11 @@ pub struct JobInfo {
/// The time this job was last updated /// The time this job was last updated
updated_at: DateTime<Utc>, updated_at: DateTime<Utc>,
/// Milliseconds from execution until the job is considered dead
///
/// This is important for storage implementations to reap unfinished jobs
timeout: i64,
} }
impl JobInfo { impl JobInfo {
@ -183,10 +196,10 @@ impl JobInfo {
let now = Utc::now(); let now = Utc::now();
let next_queue = match self.backoff_strategy { let next_queue = match self.backoff_strategy {
Backoff::Linear(secs) => now + OldDuration::seconds(secs as i64), Backoff::Linear(secs) => now + Duration::seconds(secs as i64),
Backoff::Exponential(base) => { Backoff::Exponential(base) => {
let secs = base.pow(self.retry_count); let secs = base.pow(self.retry_count);
now + OldDuration::seconds(secs as i64) now + Duration::seconds(secs as i64)
} }
}; };
@ -220,8 +233,10 @@ impl JobInfo {
} }
/// Whether this job is pending execution /// Whether this job is pending execution
pub fn is_pending(&self) -> bool { pub fn is_pending(&self, now: DateTime<Utc>) -> bool {
self.status == JobStatus::Pending self.status == JobStatus::Pending
|| (self.status == JobStatus::Running
&& (self.updated_at + Duration::milliseconds(self.timeout)) < now)
} }
pub(crate) fn is_in_queue(&self, queue: &str) -> bool { pub(crate) fn is_in_queue(&self, queue: &str) -> bool {

View file

@ -80,13 +80,25 @@ pub trait Processor: Clone {
/// Define the default number of retries for a given processor /// Define the default number of retries for a given processor
/// ///
/// Defaults to Count(5)
/// Jobs can override /// Jobs can override
const MAX_RETRIES: MaxRetries; const MAX_RETRIES: MaxRetries = MaxRetries::Count(5);
/// Define the default backoff strategy for a given processor /// Define the default backoff strategy for a given processor
/// ///
/// Defaults to Exponential(2)
/// Jobs can override /// Jobs can override
const BACKOFF_STRATEGY: Backoff; const BACKOFF_STRATEGY: Backoff = Backoff::Exponential(2);
/// Define the maximum number of milliseconds a job should be allowed to run before being
/// considered dead.
///
/// This is important for allowing the job server to reap processes that were started but never
/// completed.
///
/// Defaults to 15 seconds
/// Jobs can override
const TIMEOUT: i64 = 15_000;
/// A provided method to create a new JobInfo from provided arguments /// A provided method to create a new JobInfo from provided arguments
/// ///
@ -96,6 +108,7 @@ pub trait Processor: Clone {
let queue = job.queue().unwrap_or(Self::QUEUE).to_owned(); let queue = job.queue().unwrap_or(Self::QUEUE).to_owned();
let max_retries = job.max_retries().unwrap_or(Self::MAX_RETRIES); let max_retries = job.max_retries().unwrap_or(Self::MAX_RETRIES);
let backoff_strategy = job.backoff_strategy().unwrap_or(Self::BACKOFF_STRATEGY); let backoff_strategy = job.backoff_strategy().unwrap_or(Self::BACKOFF_STRATEGY);
let timeout = job.timeout().unwrap_or(Self::TIMEOUT);
let job = NewJobInfo::new( let job = NewJobInfo::new(
Self::NAME.to_owned(), Self::NAME.to_owned(),
@ -103,6 +116,7 @@ pub trait Processor: Clone {
serde_json::to_value(job).map_err(|_| ToJson)?, serde_json::to_value(job).map_err(|_| ToJson)?,
max_retries, max_retries,
backoff_strategy, backoff_strategy,
timeout,
); );
Ok(job) Ok(job)

View file

@ -76,7 +76,8 @@ pub trait Storage: Clone + Send {
) -> Result<Option<JobInfo>, Self::Error> { ) -> Result<Option<JobInfo>, Self::Error> {
match self.fetch_job_from_queue(queue).await? { match self.fetch_job_from_queue(queue).await? {
Some(mut job) => { Some(mut job) => {
if job.is_pending() && job.is_ready(Utc::now()) && job.is_in_queue(queue) { let now = Utc::now();
if job.is_pending(now) && job.is_ready(now) && job.is_in_queue(queue) {
job.run(); job.run();
self.run_job(job.id(), runner_id).await?; self.run_job(job.id(), runner_id).await?;
self.save_job(job.clone()).await?; self.save_job(job.clone()).await?;

View file

@ -1,7 +1,7 @@
[package] [package]
name = "background-jobs-sled-storage" name = "background-jobs-sled-storage"
description = "Sled storage backend for background-jobs" description = "Sled storage backend for background-jobs"
version = "0.3.0" version = "0.4.0-alpha.0"
license-file = "../LICENSE" license-file = "../LICENSE"
authors = ["asonix <asonix@asonix.dog>"] authors = ["asonix <asonix@asonix.dog>"]
repository = "https://git.asonix.dog/Aardwolf/background-jobs" repository = "https://git.asonix.dog/Aardwolf/background-jobs"

View file

@ -98,7 +98,7 @@ impl Storage for SledStorage {
) )
.filter_map(|id| job_tree.get(id).ok()) .filter_map(|id| job_tree.get(id).ok())
.filter_map(|opt| opt) .filter_map(|opt| opt)
.filter(|job| job.is_ready(now)) .filter(|job| job.is_ready(now) && job.is_pending(now))
.next(); .next();
if let Some(ref job) = job { if let Some(ref job) = job {