Enable cleanup of variants

Still TODO: doing a first pass to mark variants as accessed on launch
This commit is contained in:
asonix 2023-07-22 19:41:50 -05:00
parent 47e13ec04e
commit fe1f7c869f
6 changed files with 151 additions and 30 deletions

View file

@ -43,7 +43,7 @@ impl Drop for MetricsGuard {
} }
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[tracing::instrument(skip(repo, store, hash))] #[tracing::instrument(skip(repo, store, hash, process_map, media))]
pub(crate) async fn generate<R: FullRepo, S: Store + 'static>( pub(crate) async fn generate<R: FullRepo, S: Store + 'static>(
repo: &R, repo: &R,
store: &S, store: &S,
@ -78,7 +78,7 @@ pub(crate) async fn generate<R: FullRepo, S: Store + 'static>(
} }
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[tracing::instrument(skip(repo, store, hash))] #[tracing::instrument(skip(repo, store, hash, media))]
async fn process<R: FullRepo, S: Store + 'static>( async fn process<R: FullRepo, S: Store + 'static>(
repo: &R, repo: &R,
store: &S, store: &S,

View file

@ -1344,16 +1344,42 @@ fn configure_endpoints<
); );
} }
fn spawn_workers<R, S>(repo: R, store: S, config: &Configuration, process_map: ProcessMap) fn spawn_cleanup<R>(repo: R)
where
R: FullRepo + 'static,
{
tracing::trace_span!(parent: None, "Spawn task").in_scope(|| {
actix_rt::spawn(async move {
let mut interval = actix_rt::time::interval(Duration::from_secs(30));
loop {
interval.tick().await;
if let Err(e) = queue::cleanup_outdated_variants(&repo).await {
tracing::warn!(
"Failed to spawn cleanup for outdated variants:{}",
format!("\n{e}\n{e:?}")
);
}
}
});
})
}
fn spawn_workers<R, S>(repo: R, store: S, config: Configuration, process_map: ProcessMap)
where where
R: FullRepo + 'static, R: FullRepo + 'static,
S: Store + 'static, S: Store + 'static,
{ {
let worker_id_1 = next_worker_id(&config);
let worker_id_2 = next_worker_id(&config);
tracing::trace_span!(parent: None, "Spawn task").in_scope(|| { tracing::trace_span!(parent: None, "Spawn task").in_scope(|| {
actix_rt::spawn(queue::process_cleanup( actix_rt::spawn(queue::process_cleanup(
repo.clone(), repo.clone(),
store.clone(), store.clone(),
next_worker_id(config), config.clone(),
worker_id_1,
)) ))
}); });
tracing::trace_span!(parent: None, "Spawn task").in_scope(|| { tracing::trace_span!(parent: None, "Spawn task").in_scope(|| {
@ -1361,8 +1387,8 @@ where
repo, repo,
store, store,
process_map, process_map,
config.clone(), config,
next_worker_id(config), worker_id_2,
)) ))
}); });
} }
@ -1378,6 +1404,8 @@ async fn launch_file_store<R: FullRepo + 'static, F: Fn(&mut web::ServiceConfig)
let address = config.server.address; let address = config.server.address;
spawn_cleanup(repo.clone());
HttpServer::new(move || { HttpServer::new(move || {
let client = client.clone(); let client = client.clone();
let store = store.clone(); let store = store.clone();
@ -1385,7 +1413,12 @@ async fn launch_file_store<R: FullRepo + 'static, F: Fn(&mut web::ServiceConfig)
let config = config.clone(); let config = config.clone();
let extra_config = extra_config.clone(); let extra_config = extra_config.clone();
spawn_workers(repo.clone(), store.clone(), &config, process_map.clone()); spawn_workers(
repo.clone(),
store.clone(),
config.clone(),
process_map.clone(),
);
App::new() App::new()
.wrap(TracingLogger::default()) .wrap(TracingLogger::default())
@ -1413,6 +1446,8 @@ async fn launch_object_store<
let address = config.server.address; let address = config.server.address;
spawn_cleanup(repo.clone());
HttpServer::new(move || { HttpServer::new(move || {
let client = client.clone(); let client = client.clone();
let store = store_config.clone().build(client.clone()); let store = store_config.clone().build(client.clone());
@ -1420,7 +1455,12 @@ async fn launch_object_store<
let config = config.clone(); let config = config.clone();
let extra_config = extra_config.clone(); let extra_config = extra_config.clone();
spawn_workers(repo.clone(), store.clone(), &config, process_map.clone()); spawn_workers(
repo.clone(),
store.clone(),
config.clone(),
process_map.clone(),
);
App::new() App::new()
.wrap(TracingLogger::default()) .wrap(TracingLogger::default())

View file

@ -59,8 +59,11 @@ enum Cleanup {
}, },
Variant { Variant {
hash: Base64Bytes, hash: Base64Bytes,
#[serde(skip_serializing_if = "Option::is_none")]
variant: Option<String>,
}, },
AllVariants, AllVariants,
OutdatedVariants,
} }
#[derive(Debug, serde::Deserialize, serde::Serialize)] #[derive(Debug, serde::Deserialize, serde::Serialize)]
@ -110,14 +113,25 @@ pub(crate) async fn cleanup_identifier<R: QueueRepo, I: Identifier>(
Ok(()) Ok(())
} }
async fn cleanup_variants<R: QueueRepo>(repo: &R, hash: R::Bytes) -> Result<(), Error> { async fn cleanup_variants<R: QueueRepo>(
repo: &R,
hash: R::Bytes,
variant: Option<String>,
) -> Result<(), Error> {
let job = serde_json::to_vec(&Cleanup::Variant { let job = serde_json::to_vec(&Cleanup::Variant {
hash: Base64Bytes(hash.as_ref().to_vec()), hash: Base64Bytes(hash.as_ref().to_vec()),
variant,
})?; })?;
repo.push(CLEANUP_QUEUE, job.into()).await?; repo.push(CLEANUP_QUEUE, job.into()).await?;
Ok(()) Ok(())
} }
pub(crate) async fn cleanup_outdated_variants<R: QueueRepo>(repo: &R) -> Result<(), Error> {
let job = serde_json::to_vec(&Cleanup::OutdatedVariants)?;
repo.push(CLEANUP_QUEUE, job.into()).await?;
Ok(())
}
pub(crate) async fn cleanup_all_variants<R: QueueRepo>(repo: &R) -> Result<(), Error> { pub(crate) async fn cleanup_all_variants<R: QueueRepo>(repo: &R) -> Result<(), Error> {
let job = serde_json::to_vec(&Cleanup::AllVariants)?; let job = serde_json::to_vec(&Cleanup::AllVariants)?;
repo.push(CLEANUP_QUEUE, job.into()).await?; repo.push(CLEANUP_QUEUE, job.into()).await?;
@ -156,8 +170,21 @@ pub(crate) async fn queue_generate<R: QueueRepo>(
Ok(()) Ok(())
} }
pub(crate) async fn process_cleanup<R: FullRepo, S: Store>(repo: R, store: S, worker_id: String) { pub(crate) async fn process_cleanup<R: FullRepo, S: Store>(
process_jobs(&repo, &store, worker_id, CLEANUP_QUEUE, cleanup::perform).await repo: R,
store: S,
config: Configuration,
worker_id: String,
) {
process_jobs(
&repo,
&store,
&config,
worker_id,
CLEANUP_QUEUE,
cleanup::perform,
)
.await
} }
pub(crate) async fn process_images<R: FullRepo + 'static, S: Store + 'static>( pub(crate) async fn process_images<R: FullRepo + 'static, S: Store + 'static>(
@ -184,6 +211,7 @@ type LocalBoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + 'a>>;
async fn process_jobs<R, S, F>( async fn process_jobs<R, S, F>(
repo: &R, repo: &R,
store: &S, store: &S,
config: &Configuration,
worker_id: String, worker_id: String,
queue: &'static str, queue: &'static str,
callback: F, callback: F,
@ -191,10 +219,11 @@ async fn process_jobs<R, S, F>(
R: QueueRepo + HashRepo + IdentifierRepo + AliasRepo, R: QueueRepo + HashRepo + IdentifierRepo + AliasRepo,
R::Bytes: Clone, R::Bytes: Clone,
S: Store, S: Store,
for<'a> F: Fn(&'a R, &'a S, &'a [u8]) -> LocalBoxFuture<'a, Result<(), Error>> + Copy, for<'a> F: Fn(&'a R, &'a S, &'a Configuration, &'a [u8]) -> LocalBoxFuture<'a, Result<(), Error>>
+ Copy,
{ {
loop { loop {
let res = job_loop(repo, store, worker_id.clone(), queue, callback).await; let res = job_loop(repo, store, config, worker_id.clone(), queue, callback).await;
if let Err(e) = res { if let Err(e) = res {
tracing::warn!("Error processing jobs: {}", format!("{e}")); tracing::warn!("Error processing jobs: {}", format!("{e}"));
@ -209,6 +238,7 @@ async fn process_jobs<R, S, F>(
async fn job_loop<R, S, F>( async fn job_loop<R, S, F>(
repo: &R, repo: &R,
store: &S, store: &S,
config: &Configuration,
worker_id: String, worker_id: String,
queue: &'static str, queue: &'static str,
callback: F, callback: F,
@ -217,14 +247,15 @@ where
R: QueueRepo + HashRepo + IdentifierRepo + AliasRepo, R: QueueRepo + HashRepo + IdentifierRepo + AliasRepo,
R::Bytes: Clone, R::Bytes: Clone,
S: Store, S: Store,
for<'a> F: Fn(&'a R, &'a S, &'a [u8]) -> LocalBoxFuture<'a, Result<(), Error>> + Copy, for<'a> F: Fn(&'a R, &'a S, &'a Configuration, &'a [u8]) -> LocalBoxFuture<'a, Result<(), Error>>
+ Copy,
{ {
loop { loop {
let bytes = repo.pop(queue, worker_id.as_bytes().to_vec()).await?; let bytes = repo.pop(queue, worker_id.as_bytes().to_vec()).await?;
let span = tracing::info_span!("Running Job", worker_id = ?worker_id); let span = tracing::info_span!("Running Job", worker_id = ?worker_id);
span.in_scope(|| (callback)(repo, store, bytes.as_ref())) span.in_scope(|| (callback)(repo, store, config, bytes.as_ref()))
.instrument(span) .instrument(span)
.await?; .await?;
} }

View file

@ -1,7 +1,8 @@
use crate::{ use crate::{
config::Configuration,
error::{Error, UploadError}, error::{Error, UploadError},
queue::{Base64Bytes, Cleanup, LocalBoxFuture}, queue::{Base64Bytes, Cleanup, LocalBoxFuture},
repo::{Alias, AliasRepo, DeleteToken, FullRepo, HashRepo, IdentifierRepo}, repo::{Alias, AliasRepo, DeleteToken, FullRepo, HashRepo, IdentifierRepo, VariantAccessRepo},
serde_str::Serde, serde_str::Serde,
store::{Identifier, Store}, store::{Identifier, Store},
}; };
@ -10,6 +11,7 @@ use futures_util::StreamExt;
pub(super) fn perform<'a, R, S>( pub(super) fn perform<'a, R, S>(
repo: &'a R, repo: &'a R,
store: &'a S, store: &'a S,
configuration: &'a Configuration,
job: &'a [u8], job: &'a [u8],
) -> LocalBoxFuture<'a, Result<(), Error>> ) -> LocalBoxFuture<'a, Result<(), Error>>
where where
@ -38,8 +40,10 @@ where
} }
Cleanup::Variant { Cleanup::Variant {
hash: Base64Bytes(hash), hash: Base64Bytes(hash),
} => variant::<R, S>(repo, hash).await?, variant,
} => hash_variant::<R, S>(repo, hash, variant).await?,
Cleanup::AllVariants => all_variants::<R, S>(repo).await?, Cleanup::AllVariants => all_variants::<R, S>(repo).await?,
Cleanup::OutdatedVariants => outdated_variants::<R, S>(repo, configuration).await?,
}, },
Err(e) => { Err(e) => {
tracing::warn!("Invalid job: {}", format!("{e}")); tracing::warn!("Invalid job: {}", format!("{e}"));
@ -150,6 +154,7 @@ where
Ok(()) Ok(())
} }
#[tracing::instrument(skip_all)]
async fn all_variants<R, S>(repo: &R) -> Result<(), Error> async fn all_variants<R, S>(repo: &R) -> Result<(), Error>
where where
R: FullRepo, R: FullRepo,
@ -159,22 +164,60 @@ where
while let Some(res) = hash_stream.next().await { while let Some(res) = hash_stream.next().await {
let hash = res?; let hash = res?;
super::cleanup_variants(repo, hash).await?; super::cleanup_variants(repo, hash, None).await?;
} }
Ok(()) Ok(())
} }
async fn variant<R, S>(repo: &R, hash: Vec<u8>) -> Result<(), Error> #[tracing::instrument(skip_all)]
async fn outdated_variants<R, S>(repo: &R, config: &Configuration) -> Result<(), Error>
where
R: FullRepo,
S: Store,
{
let now = time::OffsetDateTime::now_utc();
let since = now.saturating_sub(config.media.retention.variants.to_duration());
let mut variant_stream = Box::pin(repo.older_variants(since).await?);
while let Some(res) = variant_stream.next().await {
let (hash, variant) = res?;
super::cleanup_variants(repo, hash, Some(variant)).await?;
}
Ok(())
}
#[tracing::instrument(skip_all)]
async fn hash_variant<R, S>(
repo: &R,
hash: Vec<u8>,
target_variant: Option<String>,
) -> Result<(), Error>
where where
R: FullRepo, R: FullRepo,
S: Store, S: Store,
{ {
let hash: R::Bytes = hash.into(); let hash: R::Bytes = hash.into();
for (variant, identifier) in repo.variants::<S::Identifier>(hash.clone()).await? { if let Some(target_variant) = target_variant {
repo.remove_variant(hash.clone(), variant).await?; if let Some(identifier) = repo
super::cleanup_identifier(repo, identifier).await?; .variant_identifier::<S::Identifier>(hash.clone(), target_variant.clone())
.await?
{
super::cleanup_identifier(repo, identifier).await?;
}
repo.remove_variant(hash.clone(), target_variant.clone())
.await?;
VariantAccessRepo::remove_access(repo, hash, target_variant).await?;
} else {
for (variant, identifier) in repo.variants::<S::Identifier>(hash.clone()).await? {
repo.remove_variant(hash.clone(), variant.clone()).await?;
VariantAccessRepo::remove_access(repo, hash.clone(), variant).await?;
super::cleanup_identifier(repo, identifier).await?;
}
} }
Ok(()) Ok(())

View file

@ -156,7 +156,7 @@ pub(crate) trait AliasAccessRepo: BaseRepo {
timestamp: time::OffsetDateTime, timestamp: time::OffsetDateTime,
) -> Result<Self::AliasAccessStream, RepoError>; ) -> Result<Self::AliasAccessStream, RepoError>;
async fn remove(&self, alias: Alias) -> Result<(), RepoError>; async fn remove_access(&self, alias: Alias) -> Result<(), RepoError>;
} }
#[async_trait::async_trait(?Send)] #[async_trait::async_trait(?Send)]
@ -177,8 +177,8 @@ where
T::older_aliases(self, timestamp).await T::older_aliases(self, timestamp).await
} }
async fn remove(&self, alias: Alias) -> Result<(), RepoError> { async fn remove_access(&self, alias: Alias) -> Result<(), RepoError> {
T::remove(self, alias).await T::remove_access(self, alias).await
} }
} }
@ -196,7 +196,7 @@ pub(crate) trait VariantAccessRepo: BaseRepo {
timestamp: time::OffsetDateTime, timestamp: time::OffsetDateTime,
) -> Result<Self::VariantAccessStream, RepoError>; ) -> Result<Self::VariantAccessStream, RepoError>;
async fn remove(&self, hash: Self::Bytes, variant: String) -> Result<(), RepoError>; async fn remove_access(&self, hash: Self::Bytes, variant: String) -> Result<(), RepoError>;
} }
#[async_trait::async_trait(?Send)] #[async_trait::async_trait(?Send)]
@ -225,8 +225,8 @@ where
T::older_variants(self, timestamp).await T::older_variants(self, timestamp).await
} }
async fn remove(&self, hash: Self::Bytes, variant: String) -> Result<(), RepoError> { async fn remove_access(&self, hash: Self::Bytes, variant: String) -> Result<(), RepoError> {
T::remove(self, hash, variant).await T::remove_access(self, hash, variant).await
} }
} }

View file

@ -269,6 +269,7 @@ impl futures_util::Stream for VariantAccessStream {
impl AliasAccessRepo for SledRepo { impl AliasAccessRepo for SledRepo {
type AliasAccessStream = AliasAccessStream; type AliasAccessStream = AliasAccessStream;
#[tracing::instrument(level = "debug", skip(self))]
async fn accessed(&self, alias: Alias) -> Result<(), RepoError> { async fn accessed(&self, alias: Alias) -> Result<(), RepoError> {
let now_string = time::OffsetDateTime::now_utc() let now_string = time::OffsetDateTime::now_utc()
.format(&time::format_description::well_known::Rfc3339) .format(&time::format_description::well_known::Rfc3339)
@ -289,6 +290,7 @@ impl AliasAccessRepo for SledRepo {
.map_err(RepoError::from) .map_err(RepoError::from)
} }
#[tracing::instrument(level = "debug", skip(self))]
async fn older_aliases( async fn older_aliases(
&self, &self,
timestamp: time::OffsetDateTime, timestamp: time::OffsetDateTime,
@ -312,7 +314,8 @@ impl AliasAccessRepo for SledRepo {
}) })
} }
async fn remove(&self, alias: Alias) -> Result<(), RepoError> { #[tracing::instrument(level = "debug", skip(self))]
async fn remove_access(&self, alias: Alias) -> Result<(), RepoError> {
let alias_access = self.alias_access.clone(); let alias_access = self.alias_access.clone();
let inverse_alias_access = self.inverse_alias_access.clone(); let inverse_alias_access = self.inverse_alias_access.clone();
@ -332,6 +335,7 @@ impl AliasAccessRepo for SledRepo {
impl VariantAccessRepo for SledRepo { impl VariantAccessRepo for SledRepo {
type VariantAccessStream = VariantAccessStream; type VariantAccessStream = VariantAccessStream;
#[tracing::instrument(level = "debug", skip_all, fields(hash = %hex::encode(&hash), variant = %variant))]
async fn accessed(&self, hash: Self::Bytes, variant: String) -> Result<(), RepoError> { async fn accessed(&self, hash: Self::Bytes, variant: String) -> Result<(), RepoError> {
let key = variant_access_key(&hash, &variant); let key = variant_access_key(&hash, &variant);
@ -354,6 +358,7 @@ impl VariantAccessRepo for SledRepo {
.map_err(RepoError::from) .map_err(RepoError::from)
} }
#[tracing::instrument(level = "debug", skip_all, fields(hash = %hex::encode(&hash), variant = %variant))]
async fn contains_variant( async fn contains_variant(
&self, &self,
hash: Self::Bytes, hash: Self::Bytes,
@ -366,6 +371,7 @@ impl VariantAccessRepo for SledRepo {
Ok(timestamp.is_some()) Ok(timestamp.is_some())
} }
#[tracing::instrument(level = "debug", skip(self))]
async fn older_variants( async fn older_variants(
&self, &self,
timestamp: time::OffsetDateTime, timestamp: time::OffsetDateTime,
@ -389,7 +395,8 @@ impl VariantAccessRepo for SledRepo {
}) })
} }
async fn remove(&self, hash: Self::Bytes, variant: String) -> Result<(), RepoError> { #[tracing::instrument(level = "debug", skip_all, fields(hash = %hex::encode(&hash), variant = %variant))]
async fn remove_access(&self, hash: Self::Bytes, variant: String) -> Result<(), RepoError> {
let key = variant_access_key(&hash, &variant); let key = variant_access_key(&hash, &variant);
let variant_access = self.variant_access.clone(); let variant_access = self.variant_access.clone();