use instance id instead of domain for stats channel

This commit is contained in:
Felix Ableitner 2024-05-24 10:37:22 +02:00
parent 95b99fe4bc
commit df1be61bb4
3 changed files with 51 additions and 19 deletions

View file

@ -38,7 +38,7 @@ pub struct SendManager {
opts: Opts, opts: Opts,
workers: HashMap<InstanceId, CancellableTask>, workers: HashMap<InstanceId, CancellableTask>,
context: FederationConfig<LemmyContext>, context: FederationConfig<LemmyContext>,
stats_sender: UnboundedSender<(String, FederationQueueState)>, stats_sender: UnboundedSender<(InstanceId, FederationQueueState)>,
exit_print: JoinHandle<()>, exit_print: JoinHandle<()>,
} }

View file

@ -1,19 +1,24 @@
use crate::util::get_latest_activity_id; use crate::util::get_latest_activity_id;
use chrono::Local; use chrono::Local;
use diesel::result::Error::NotFound;
use lemmy_api_common::federate_retry_sleep_duration; use lemmy_api_common::federate_retry_sleep_duration;
use lemmy_db_schema::{ use lemmy_db_schema::{
source::federation_queue_state::FederationQueueState, newtypes::InstanceId,
source::{federation_queue_state::FederationQueueState, instance::Instance},
utils::{ActualDbPool, DbPool}, utils::{ActualDbPool, DbPool},
}; };
use lemmy_utils::{error::LemmyResult, CACHE_DURATION_FEDERATION};
use moka::future::Cache;
use once_cell::sync::Lazy;
use std::{collections::HashMap, time::Duration}; use std::{collections::HashMap, time::Duration};
use tokio::{sync::mpsc::UnboundedReceiver, time::interval}; use tokio::{sync::mpsc::UnboundedReceiver, time::interval};
use tracing::{debug, error, info}; use tracing::{debug, info, warn};
/// every 60s, print the state for every instance. exits if the receiver is done (all senders /// every 60s, print the state for every instance. exits if the receiver is done (all senders
/// dropped) /// dropped)
pub(crate) async fn receive_print_stats( pub(crate) async fn receive_print_stats(
pool: ActualDbPool, pool: ActualDbPool,
mut receiver: UnboundedReceiver<(String, FederationQueueState)>, mut receiver: UnboundedReceiver<(InstanceId, FederationQueueState)>,
) { ) {
let pool = &mut DbPool::Pool(&pool); let pool = &mut DbPool::Pool(&pool);
let mut printerval = interval(Duration::from_secs(60)); let mut printerval = interval(Duration::from_secs(60));
@ -21,11 +26,15 @@ pub(crate) async fn receive_print_stats(
loop { loop {
tokio::select! { tokio::select! {
ele = receiver.recv() => { ele = receiver.recv() => {
let Some((domain, ele)) = ele else { match ele {
print_stats(pool, &stats).await; // update stats for instance
return; Some((instance_id, ele)) => {stats.insert(instance_id, ele);},
}; // receiver closed, print stats and exit
stats.insert(domain, ele); None => {
print_stats(pool, &stats).await;
return;
}
}
}, },
_ = printerval.tick() => { _ = printerval.tick() => {
print_stats(pool, &stats).await; print_stats(pool, &stats).await;
@ -34,19 +43,41 @@ pub(crate) async fn receive_print_stats(
} }
} }
async fn print_stats(pool: &mut DbPool<'_>, stats: &HashMap<String, FederationQueueState>) { async fn print_stats(pool: &mut DbPool<'_>, stats: &HashMap<InstanceId, FederationQueueState>) {
let last_id = get_latest_activity_id(pool).await; let res = print_stats_with_error(pool, stats).await;
let Ok(last_id) = last_id else { if let Err(e) = res {
error!("could not get last id"); warn!("Failed to print stats: {e}");
return; }
}; }
async fn print_stats_with_error(
pool: &mut DbPool<'_>,
stats: &HashMap<InstanceId, FederationQueueState>,
) -> LemmyResult<()> {
static INSTANCE_CACHE: Lazy<Cache<(), Vec<Instance>>> = Lazy::new(|| {
Cache::builder()
.max_capacity(1)
.time_to_live(CACHE_DURATION_FEDERATION)
.build()
});
let instances = INSTANCE_CACHE
.try_get_with((), async { Instance::read_all(pool).await })
.await?;
let last_id = get_latest_activity_id(pool).await?;
// it's expected that the values are a bit out of date, everything < SAVE_STATE_EVERY should be // it's expected that the values are a bit out of date, everything < SAVE_STATE_EVERY should be
// considered up to date // considered up to date
info!("Federation state as of {}:", Local::now().to_rfc3339()); info!("Federation state as of {}:", Local::now().to_rfc3339());
// todo: more stats (act/sec, avg http req duration) // todo: more stats (act/sec, avg http req duration)
let mut ok_count = 0; let mut ok_count = 0;
let mut behind_count = 0; let mut behind_count = 0;
for (domain, stat) in stats { for (instance_id, stat) in stats {
let domain = &instances
.iter()
.find(|i| &i.id == instance_id)
.ok_or(NotFound)?
.domain;
let behind = last_id.0 - stat.last_successful_id.map(|e| e.0).unwrap_or(0); let behind = last_id.0 - stat.last_successful_id.map(|e| e.0).unwrap_or(0);
if stat.fail_count > 0 { if stat.fail_count > 0 {
info!( info!(
@ -62,4 +93,5 @@ async fn print_stats(pool: &mut DbPool<'_>, stats: &HashMap<String, FederationQu
} }
} }
info!("{ok_count} others up to date. {behind_count} instances behind."); info!("{ok_count} others up to date. {behind_count} instances behind.");
Ok(())
} }

View file

@ -75,7 +75,7 @@ pub(crate) struct InstanceWorker {
followed_communities: HashMap<CommunityId, HashSet<Url>>, followed_communities: HashMap<CommunityId, HashSet<Url>>,
stop: CancellationToken, stop: CancellationToken,
context: Data<LemmyContext>, context: Data<LemmyContext>,
stats_sender: UnboundedSender<(String, FederationQueueState)>, stats_sender: UnboundedSender<(InstanceId, FederationQueueState)>,
last_full_communities_fetch: DateTime<Utc>, last_full_communities_fetch: DateTime<Utc>,
last_incremental_communities_fetch: DateTime<Utc>, last_incremental_communities_fetch: DateTime<Utc>,
state: FederationQueueState, state: FederationQueueState,
@ -87,7 +87,7 @@ impl InstanceWorker {
instance: Instance, instance: Instance,
context: Data<LemmyContext>, context: Data<LemmyContext>,
stop: CancellationToken, stop: CancellationToken,
stats_sender: UnboundedSender<(String, FederationQueueState)>, stats_sender: UnboundedSender<(InstanceId, FederationQueueState)>,
) -> Result<(), anyhow::Error> { ) -> Result<(), anyhow::Error> {
let mut pool = context.pool(); let mut pool = context.pool();
let state = FederationQueueState::load(&mut pool, instance.id).await?; let state = FederationQueueState::load(&mut pool, instance.id).await?;
@ -350,7 +350,7 @@ impl InstanceWorker {
FederationQueueState::upsert(&mut self.context.pool(), &self.state).await?; FederationQueueState::upsert(&mut self.context.pool(), &self.state).await?;
self self
.stats_sender .stats_sender
.send((self.instance.domain.clone(), self.state.clone()))?; .send((self.instance.id, self.state.clone()))?;
Ok(()) Ok(())
} }
} }