BROKEN! Start collecting more metrics about various sizes

This commit is contained in:
asonix 2024-04-07 11:04:03 -05:00
parent 76a0c79369
commit 21883c168b
6 changed files with 153 additions and 41 deletions

View file

@ -15,6 +15,10 @@ const MINUTES: u64 = 60 * SECONDS;
const HOURS: u64 = 60 * MINUTES; const HOURS: u64 = 60 * MINUTES;
const DAYS: u64 = 24 * HOURS; const DAYS: u64 = 24 * HOURS;
pub(crate) fn recordable(len: usize) -> u32 {
((len as u64) % u64::from(u32::MAX)) as u32
}
type DistributionMap = BTreeMap<Vec<(String, String)>, Summary>; type DistributionMap = BTreeMap<Vec<(String, String)>, Summary>;
#[derive(Clone)] #[derive(Clone)]
@ -295,11 +299,20 @@ impl Inner {
.entry(labels) .entry(labels)
.or_insert_with(Summary::with_defaults); .or_insert_with(Summary::with_defaults);
histogram.get_inner().clear_with(|samples| { let h = histogram.get_inner().clear_with(|samples| {
for sample in samples { for sample in samples {
entry.add(*sample); entry.add(*sample);
} }
}) });
let mut total_len = 0;
for dist_map in d.values() {
total_len += dist_map.len();
}
metrics::gauge!("relay.collector.distributions.size").set(recordable(total_len));
h
} }
let d = self.distributions.read().unwrap().clone(); let d = self.distributions.read().unwrap().clone();
@ -358,6 +371,7 @@ impl MemoryCollector {
) { ) {
let mut d = self.inner.descriptions.write().unwrap(); let mut d = self.inner.descriptions.write().unwrap();
d.entry(key.as_str().to_owned()).or_insert(description); d.entry(key.as_str().to_owned()).or_insert(description);
metrics::gauge!("relay.collector.descriptions.size").set(recordable(d.len()));
} }
pub(crate) fn install(&self) -> Result<(), SetRecorderError<Self>> { pub(crate) fn install(&self) -> Result<(), SetRecorderError<Self>> {

View file

@ -9,10 +9,10 @@ pub(crate) struct LastOnline {
impl LastOnline { impl LastOnline {
pub(crate) fn mark_seen(&self, iri: &IriStr) { pub(crate) fn mark_seen(&self, iri: &IriStr) {
if let Some(authority) = iri.authority_str() { if let Some(authority) = iri.authority_str() {
self.domains let mut guard = self.domains.lock().unwrap();
.lock() guard.insert(authority.to_string(), OffsetDateTime::now_utc());
.unwrap() metrics::gauge!("relay.last-online.size",)
.insert(authority.to_string(), OffsetDateTime::now_utc()); .set(crate::collector::recordable(guard.len()));
} }
} }

View file

@ -73,7 +73,9 @@ impl State {
} }
pub(crate) fn cache(&self, object_id: IriString, actor_id: IriString) { pub(crate) fn cache(&self, object_id: IriString, actor_id: IriString) {
self.object_cache.write().unwrap().put(object_id, actor_id); let mut guard = self.object_cache.write().unwrap();
guard.put(object_id, actor_id);
metrics::gauge!("relay.object-cache.size").set(crate::collector::recordable(guard.len()));
} }
pub(crate) fn is_connected(&self, iri: &IriString) -> bool { pub(crate) fn is_connected(&self, iri: &IriString) -> bool {

128
src/db.rs
View file

@ -7,7 +7,7 @@ use rsa::{
pkcs8::{DecodePrivateKey, EncodePrivateKey}, pkcs8::{DecodePrivateKey, EncodePrivateKey},
RsaPrivateKey, RsaPrivateKey,
}; };
use sled::{Batch, Tree}; use sled::{transaction::TransactionError, Batch, Transactional, Tree};
use std::{ use std::{
collections::{BTreeMap, HashMap}, collections::{BTreeMap, HashMap},
sync::{ sync::{
@ -283,10 +283,15 @@ impl Db {
pub(crate) async fn check_health(&self) -> Result<(), Error> { pub(crate) async fn check_health(&self) -> Result<(), Error> {
let next = self.inner.healthz_counter.fetch_add(1, Ordering::Relaxed); let next = self.inner.healthz_counter.fetch_add(1, Ordering::Relaxed);
self.unblock(move |inner| { self.unblock(move |inner| {
inner let res = inner
.healthz .healthz
.insert("healthz", &next.to_be_bytes()[..]) .insert("healthz", &next.to_be_bytes()[..])
.map_err(Error::from) .map_err(Error::from);
metrics::gauge!("relay.db.healthz.size")
.set(crate::collector::recordable(inner.healthz.len()));
res
}) })
.await?; .await?;
self.inner.healthz.flush_async().await?; self.inner.healthz.flush_async().await?;
@ -349,6 +354,9 @@ impl Db {
.actor_id_info .actor_id_info
.insert(actor_id.as_str().as_bytes(), vec)?; .insert(actor_id.as_str().as_bytes(), vec)?;
metrics::gauge!("relay.db.actor-id-info.size")
.set(crate::collector::recordable(inner.actor_id_info.len()));
Ok(()) Ok(())
}) })
.await .await
@ -383,6 +391,9 @@ impl Db {
.actor_id_instance .actor_id_instance
.insert(actor_id.as_str().as_bytes(), vec)?; .insert(actor_id.as_str().as_bytes(), vec)?;
metrics::gauge!("relay.db.actor-id-instance.size")
.set(crate::collector::recordable(inner.actor_id_instance.len()));
Ok(()) Ok(())
}) })
.await .await
@ -417,6 +428,9 @@ impl Db {
.actor_id_contact .actor_id_contact
.insert(actor_id.as_str().as_bytes(), vec)?; .insert(actor_id.as_str().as_bytes(), vec)?;
metrics::gauge!("relay.db.actor-id-contact.size")
.set(crate::collector::recordable(inner.actor_id_contact.len()));
Ok(()) Ok(())
}) })
.await .await
@ -447,6 +461,12 @@ impl Db {
inner inner
.media_url_media_id .media_url_media_id
.insert(url.as_str().as_bytes(), id.as_bytes())?; .insert(url.as_str().as_bytes(), id.as_bytes())?;
metrics::gauge!("relay.db.media-id-media-url.size")
.set(crate::collector::recordable(inner.media_id_media_url.len()));
metrics::gauge!("relay.db.media-url-media-id.size")
.set(crate::collector::recordable(inner.media_url_media_id.len()));
Ok(()) Ok(())
}) })
.await .await
@ -538,6 +558,14 @@ impl Db {
inner inner
.actor_id_actor .actor_id_actor
.insert(actor.id.as_str().as_bytes(), vec)?; .insert(actor.id.as_str().as_bytes(), vec)?;
metrics::gauge!("relay.db.public-key-actor-id.size").set(crate::collector::recordable(
inner.public_key_id_actor_id.len(),
));
metrics::gauge!("relay.db.actor-id-actor.size").set(crate::collector::recordable(
inner.public_key_id_actor_id.len(),
));
Ok(()) Ok(())
}) })
.await .await
@ -550,6 +578,10 @@ impl Db {
.connected_actor_ids .connected_actor_ids
.remove(actor_id.as_str().as_bytes())?; .remove(actor_id.as_str().as_bytes())?;
metrics::gauge!("relay.db.connected-actor-ids.size").set(crate::collector::recordable(
inner.connected_actor_ids.len(),
));
Ok(()) Ok(())
}) })
.await .await
@ -562,6 +594,10 @@ impl Db {
.connected_actor_ids .connected_actor_ids
.insert(actor_id.as_str().as_bytes(), actor_id.as_str().as_bytes())?; .insert(actor_id.as_str().as_bytes(), actor_id.as_str().as_bytes())?;
metrics::gauge!("relay.db.connected-actor-ids.size").set(crate::collector::recordable(
inner.connected_actor_ids.len(),
));
Ok(()) Ok(())
}) })
.await .await
@ -569,30 +605,60 @@ impl Db {
pub(crate) async fn add_blocks(&self, domains: Vec<String>) -> Result<(), Error> { pub(crate) async fn add_blocks(&self, domains: Vec<String>) -> Result<(), Error> {
self.unblock(move |inner| { self.unblock(move |inner| {
let mut connected_batch = Batch::default();
let mut blocked_batch = Batch::default();
let mut allowed_batch = Batch::default();
for connected in inner.connected_by_domain(&domains) { for connected in inner.connected_by_domain(&domains) {
inner connected_batch.remove(connected.as_str().as_bytes());
.connected_actor_ids
.remove(connected.as_str().as_bytes())?;
} }
for authority in &domains { for authority in &domains {
inner blocked_batch.insert(domain_key(authority), authority.as_bytes());
.blocked_domains allowed_batch.remove(domain_key(authority));
.insert(domain_key(authority), authority.as_bytes())?;
inner.allowed_domains.remove(domain_key(authority))?;
} }
let res = (
&inner.connected_actor_ids,
&inner.blocked_domains,
&inner.allowed_domains,
)
.transaction(|(connected, blocked, allowed)| {
inner.connected_actor_ids.apply_batch(&connected_batch)?;
inner.blocked_domains.apply_batch(&blocked_batch)?;
inner.allowed_domains.apply_batch(&allowed_batch)?;
Ok(()) Ok(())
});
metrics::gauge!("relay.db.connected-actor-ids.size").set(crate::collector::recordable(
inner.connected_actor_ids.len(),
));
metrics::gauge!("relay.db.blocked-domains.size")
.set(crate::collector::recordable(inner.blocked_domains.len()));
metrics::gauge!("relay.db.allowed-domains.size")
.set(crate::collector::recordable(inner.allowed_domains.len()));
match res {
Ok(()) => Ok(()),
Err(TransactionError::Abort(e) | TransactionError::Storage(e)) => Err(e.into()),
}
}) })
.await .await
} }
pub(crate) async fn remove_blocks(&self, domains: Vec<String>) -> Result<(), Error> { pub(crate) async fn remove_blocks(&self, domains: Vec<String>) -> Result<(), Error> {
self.unblock(move |inner| { self.unblock(move |inner| {
let mut blocked_batch = Batch::default();
for authority in &domains { for authority in &domains {
inner.blocked_domains.remove(domain_key(authority))?; blocked_batch.remove(domain_key(authority));
} }
inner.blocked_domains.apply_batch(blocked_batch)?;
metrics::gauge!("relay.db.blocked-domains.size")
.set(crate::collector::recordable(inner.blocked_domains.len()));
Ok(()) Ok(())
}) })
.await .await
@ -600,12 +666,17 @@ impl Db {
pub(crate) async fn add_allows(&self, domains: Vec<String>) -> Result<(), Error> { pub(crate) async fn add_allows(&self, domains: Vec<String>) -> Result<(), Error> {
self.unblock(move |inner| { self.unblock(move |inner| {
let mut allowed_batch = Batch::default();
for authority in &domains { for authority in &domains {
inner allowed_batch.insert(domain_key(authority), authority.as_bytes());
.allowed_domains
.insert(domain_key(authority), authority.as_bytes())?;
} }
inner.allowed_domains.apply_batch(allowed_batch)?;
metrics::gauge!("relay.db.allowed-domains.size")
.set(crate::collector::recordable(inner.allowed_domains.len()));
Ok(()) Ok(())
}) })
.await .await
@ -614,17 +685,30 @@ impl Db {
pub(crate) async fn remove_allows(&self, domains: Vec<String>) -> Result<(), Error> { pub(crate) async fn remove_allows(&self, domains: Vec<String>) -> Result<(), Error> {
self.unblock(move |inner| { self.unblock(move |inner| {
if inner.restricted_mode { if inner.restricted_mode {
let mut connected_batch = Batch::default();
for connected in inner.connected_by_domain(&domains) { for connected in inner.connected_by_domain(&domains) {
inner connected_batch.remove(connected.as_str().as_bytes());
.connected_actor_ids
.remove(connected.as_str().as_bytes())?;
}
} }
for authority in &domains { inner.connected_actor_ids.apply_batch(connected_batch)?;
inner.allowed_domains.remove(domain_key(authority))?;
metrics::gauge!("relay.db.connected-actor-ids.size").set(
crate::collector::recordable(inner.connected_actor_ids.len()),
);
} }
let mut allowed_batch = Batch::default();
for authority in &domains {
allowed_batch.remove(domain_key(authority));
}
inner.allowed_domains.apply_batch(allowed_batch)?;
metrics::gauge!("relay.db.allowed-domains.size")
.set(crate::collector::recordable(inner.allowed_domains.len()));
Ok(()) Ok(())
}) })
.await .await
@ -665,6 +749,10 @@ impl Db {
inner inner
.settings .settings
.insert("private-key".as_bytes(), pem_pkcs8.as_bytes())?; .insert("private-key".as_bytes(), pem_pkcs8.as_bytes())?;
metrics::gauge!("relay.db.settings.size")
.set(crate::collector::recordable(inner.settings.len()));
Ok(()) Ok(())
}) })
.await .await

View file

@ -40,7 +40,12 @@ fn debug_object(activity: &serde_json::Value) -> &serde_json::Value {
object object
} }
pub(crate) fn build_storage() -> MetricsStorage<Storage<TokioTimer>> {
MetricsStorage::wrap(Storage::new(TokioTimer))
}
pub(crate) fn create_workers( pub(crate) fn create_workers(
storage: MetricsStorage<Storage<TokioTimer>>,
state: State, state: State,
actors: ActorCache, actors: ActorCache,
media: MediaCache, media: MediaCache,
@ -48,9 +53,7 @@ pub(crate) fn create_workers(
) -> std::io::Result<JobServer> { ) -> std::io::Result<JobServer> {
let deliver_concurrency = config.deliver_concurrency(); let deliver_concurrency = config.deliver_concurrency();
let queue_handle = WorkerConfig::new( let queue_handle = WorkerConfig::new(storage, move |queue_handle| {
MetricsStorage::wrap(Storage::new(TokioTimer)),
move |queue_handle| {
JobState::new( JobState::new(
state.clone(), state.clone(),
actors.clone(), actors.clone(),
@ -58,8 +61,7 @@ pub(crate) fn create_workers(
media.clone(), media.clone(),
config.clone(), config.clone(),
) )
}, })
)
.register::<Deliver>() .register::<Deliver>()
.register::<DeliverMany>() .register::<DeliverMany>()
.register::<QueryNodeinfo>() .register::<QueryNodeinfo>()

View file

@ -321,9 +321,15 @@ async fn server_main(
let sign_spawner2 = sign_spawner.clone(); let sign_spawner2 = sign_spawner.clone();
let verify_spawner2 = verify_spawner.clone(); let verify_spawner2 = verify_spawner.clone();
let config2 = config.clone(); let config2 = config.clone();
let job_store = jobs::build_storage();
let server = HttpServer::new(move || { let server = HttpServer::new(move || {
let job_server = let job_server = create_workers(
create_workers(state.clone(), actors.clone(), media.clone(), config.clone()) job_store.clone(),
state.clone(),
actors.clone(),
media.clone(),
config.clone(),
)
.expect("Failed to create job server"); .expect("Failed to create job server");
let app = App::new() let app = App::new()