Merge pull request 'Implemented system metrics' (#472) from jpds/garage:system-metrics into main

Reviewed-on: https://git.deuxfleurs.fr/Deuxfleurs/garage/pulls/472
Reviewed-by: Alex <alex@adnab.me>
This commit is contained in:
Alex 2023-01-11 16:00:31 +00:00
commit be6b8f419d
5 changed files with 67 additions and 3 deletions

View file

@ -129,8 +129,12 @@ impl BlockManager {
.netapp
.endpoint("garage_block/manager.rs/Rpc".to_string());
let metrics =
BlockManagerMetrics::new(rc.rc.clone(), resync.queue.clone(), resync.errors.clone());
let metrics = BlockManagerMetrics::new(
compression_level,
rc.rc.clone(),
resync.queue.clone(),
resync.errors.clone(),
);
let scrub_persister = PersisterShared::new(&system.metadata_dir, "scrub_info");

View file

@ -5,6 +5,7 @@ use garage_db::counted_tree_hack::CountedTree;
/// TableMetrics reference all counter used for metrics
pub struct BlockManagerMetrics {
pub(crate) _compression_level: ValueObserver<u64>,
pub(crate) _rc_size: ValueObserver<u64>,
pub(crate) _resync_queue_len: ValueObserver<u64>,
pub(crate) _resync_errored_blocks: ValueObserver<u64>,
@ -25,9 +26,23 @@ pub struct BlockManagerMetrics {
}
impl BlockManagerMetrics {
pub fn new(rc_tree: db::Tree, resync_queue: CountedTree, resync_errors: CountedTree) -> Self {
pub fn new(
compression_level: Option<i32>,
rc_tree: db::Tree,
resync_queue: CountedTree,
resync_errors: CountedTree,
) -> Self {
let meter = global::meter("garage_model/block");
Self {
_compression_level: meter
.u64_value_observer("block.compression_level", move |observer| {
match compression_level {
Some(v) => observer.observe(v as u64, &[]),
None => observer.observe(0 as u64, &[]),
}
})
.with_description("Garage compression level for node")
.init(),
_rc_size: meter
.u64_value_observer("block.rc_size", move |observer| {
if let Ok(Some(v)) = rc_tree.fast_len() {

View file

@ -17,3 +17,5 @@ mod metrics;
pub mod rpc_helper;
pub use rpc_helper::*;
pub mod system_metrics;

View file

@ -38,6 +38,9 @@ use crate::replication_mode::*;
use crate::ring::*;
use crate::rpc_helper::*;
#[cfg(feature = "metrics")]
use crate::system_metrics::*;
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const STATUS_EXCHANGE_INTERVAL: Duration = Duration::from_secs(10);
@ -103,6 +106,8 @@ pub struct System {
consul_discovery: Option<ConsulDiscovery>,
#[cfg(feature = "kubernetes-discovery")]
kubernetes_discovery: Option<KubernetesDiscoveryConfig>,
#[cfg(feature = "metrics")]
metrics: SystemMetrics,
replication_mode: ReplicationMode,
replication_factor: usize,
@ -275,6 +280,9 @@ impl System {
cluster_layout_staging_hash: cluster_layout.staging_hash,
};
#[cfg(feature = "metrics")]
let metrics = SystemMetrics::new(replication_factor);
let ring = Ring::new(cluster_layout, replication_factor);
let (update_ring, ring) = watch::channel(Arc::new(ring));
@ -365,6 +373,8 @@ impl System {
consul_discovery,
#[cfg(feature = "kubernetes-discovery")]
kubernetes_discovery: config.kubernetes_discovery.clone(),
#[cfg(feature = "metrics")]
metrics,
ring,
update_ring: Mutex::new(update_ring),

33
src/rpc/system_metrics.rs Normal file
View file

@ -0,0 +1,33 @@
use opentelemetry::{global, metrics::*, KeyValue};
/// TableMetrics reference all counter used for metrics
pub struct SystemMetrics {
pub(crate) _garage_build_info: ValueObserver<u64>,
pub(crate) _replication_factor: ValueObserver<u64>,
}
impl SystemMetrics {
pub fn new(replication_factor: usize) -> Self {
let meter = global::meter("garage_system");
Self {
_garage_build_info: meter
.u64_value_observer("garage_build_info", move |observer| {
observer.observe(
1,
&[KeyValue::new(
"version",
garage_util::version::garage_version(),
)],
)
})
.with_description("Garage build info")
.init(),
_replication_factor: meter
.u64_value_observer("garage_replication_factor", move |observer| {
observer.observe(replication_factor as u64, &[])
})
.with_description("Garage replication factor setting")
.init(),
}
}
}