Update dependencies and add admin module with metrics

- Global dependencies updated in Cargo.lock
- New module created in src/admin to host:
  - the (future) admin REST API
  - the metric collection
- add configuration block

No metrics implemented yet
This commit is contained in:
mricher 2021-09-28 08:57:20 +02:00 committed by Alex Auvolat
parent 9d44127245
commit e349af13a7
No known key found for this signature in database
GPG key ID: 0E496D15096376BE
9 changed files with 284 additions and 2 deletions

81
Cargo.lock generated
View file

@ -403,6 +403,16 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "crossbeam-channel"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]] [[package]]
name = "crossbeam-epoch" name = "crossbeam-epoch"
version = "0.9.7" version = "0.9.7"
@ -766,6 +776,7 @@ dependencies = [
"bytes 1.1.0", "bytes 1.1.0",
"futures", "futures",
"futures-util", "futures-util",
"garage_admin",
"garage_api", "garage_api",
"garage_model 0.6.0", "garage_model 0.6.0",
"garage_rpc 0.6.0", "garage_rpc 0.6.0",
@ -791,6 +802,23 @@ dependencies = [
"toml", "toml",
] ]
[[package]]
name = "garage_admin"
version = "0.6.0"
dependencies = [
"futures",
"futures-util",
"garage_model 0.6.0",
"garage_util 0.6.0",
"http",
"hyper",
"lazy_static",
"log",
"opentelemetry",
"opentelemetry-prometheus",
"prometheus",
]
[[package]] [[package]]
name = "garage_api" name = "garage_api"
version = "0.6.0" version = "0.6.0"
@ -1824,6 +1852,38 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "opentelemetry"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6105e89802af13fdf48c49d7646d3b533a70e536d818aae7e78ba0433d01acb8"
dependencies = [
"async-trait",
"crossbeam-channel",
"dashmap",
"fnv",
"futures-channel",
"futures-executor",
"futures-util",
"js-sys",
"lazy_static",
"percent-encoding",
"pin-project 1.0.10",
"rand",
"thiserror",
]
[[package]]
name = "opentelemetry-prometheus"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9328977e479cebe12ce0d3fcecdaea4721d234895a9440c5b5dfd113f0594ac6"
dependencies = [
"opentelemetry",
"prometheus",
"protobuf",
]
[[package]] [[package]]
name = "ordered-float" name = "ordered-float"
version = "2.10.0" version = "2.10.0"
@ -2076,6 +2136,27 @@ dependencies = [
"unicode-xid", "unicode-xid",
] ]
[[package]]
name = "prometheus"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f64969ffd5dd8f39bd57a68ac53c163a095ed9d0fb707146da1b27025a3504"
dependencies = [
"cfg-if",
"fnv",
"lazy_static",
"memchr",
"parking_lot",
"protobuf",
"thiserror",
]
[[package]]
name = "protobuf"
version = "2.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96"
[[package]] [[package]]
name = "quick-error" name = "quick-error"
version = "1.2.3" version = "1.2.3"

View file

@ -4,9 +4,10 @@ members = [
"src/rpc", "src/rpc",
"src/table", "src/table",
"src/model", "src/model",
"src/admin",
"src/api", "src/api",
"src/web", "src/web",
"src/garage", "src/garage"
] ]
[profile.dev] [profile.dev]

View file

@ -44,6 +44,9 @@ root_domain = ".s3.garage.localhost"
bind_addr = "0.0.0.0:$((3920+$count))" bind_addr = "0.0.0.0:$((3920+$count))"
root_domain = ".web.garage.localhost" root_domain = ".web.garage.localhost"
index = "index.html" index = "index.html"
[admin_api]
bind_addr = "0.0.0.0:$((9900+$count))"
EOF EOF
echo -en "$LABEL configuration written to $CONF_PATH\n" echo -en "$LABEL configuration written to $CONF_PATH\n"

28
src/admin/Cargo.toml Normal file
View file

@ -0,0 +1,28 @@
[package]
name = "garage_admin"
version = "0.6.0"
authors = ["Maximilien Richer <code@mricher.fr>"]
edition = "2018"
license = "AGPL-3.0"
description = "Administration and metrics REST HTTP server for Garage"
repository = "https://git.deuxfleurs.fr/Deuxfleurs/garage"
[lib]
path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
garage_model = { version = "0.6.0", path = "../model" }
garage_util = { version = "0.6.0", path = "../util" }
futures = "0.3"
futures-util = "0.3"
http = "0.2"
hyper = "0.14"
log = "0.4"
opentelemetry = "0.17"
opentelemetry-prometheus = "0.10"
prometheus = "0.13"
lazy_static = "1.4"

6
src/admin/lib.rs Normal file
View file

@ -0,0 +1,6 @@
//! Crate for handling the admin and metric HTTP APIs
#[macro_use]
extern crate log;
extern crate lazy_static;
pub mod metrics;

141
src/admin/metrics.rs Normal file
View file

@ -0,0 +1,141 @@
use hyper::{
header::CONTENT_TYPE,
service::{make_service_fn, service_fn},
Body, Method, Request, Response, Server,
};
use lazy_static::lazy_static;
use opentelemetry::{
global,
metrics::{BoundCounter, BoundValueRecorder},
KeyValue,
};
use opentelemetry_prometheus::PrometheusExporter;
use prometheus::{Encoder, TextEncoder};
use std::convert::Infallible;
use std::sync::Arc;
use std::time::SystemTime;
use futures::future::*;
use garage_model::garage::Garage;
use garage_util::error::Error as GarageError;
lazy_static! {
// This defines the differennt tags that will be referenced by the object
static ref HANDLER_ALL: [KeyValue; 1] = [KeyValue::new("handler", "all")];
}
// serve_req on metric endpoint
async fn serve_req(
req: Request<Body>,
admin_server: Arc<AdminServer>,
) -> Result<Response<Body>, hyper::Error> {
println!("Receiving request at path {}", req.uri());
let request_start = SystemTime::now();
admin_server.metrics.http_counter.add(1);
let response = match (req.method(), req.uri().path()) {
(&Method::GET, "/metrics") => {
let mut buffer = vec![];
let encoder = TextEncoder::new();
let metric_families = admin_server.exporter.registry().gather();
encoder.encode(&metric_families, &mut buffer).unwrap();
admin_server
.metrics
.http_body_gauge
.record(buffer.len() as u64);
Response::builder()
.status(200)
.header(CONTENT_TYPE, encoder.format_type())
.body(Body::from(buffer))
.unwrap()
}
_ => Response::builder()
.status(404)
.body(Body::from("Not implemented"))
.unwrap(),
};
admin_server
.metrics
.http_req_histogram
.record(request_start.elapsed().map_or(0.0, |d| d.as_secs_f64()));
Ok(response)
}
// AdminServer hold the admin server internal admin_server and the metric exporter
pub struct AdminServer {
exporter: PrometheusExporter,
metrics: AdminServerMetrics,
}
// GarageMetricadmin_server holds the metrics counter definition for Garage
// FIXME: we would rather have that split up among the different libraries?
struct AdminServerMetrics {
http_counter: BoundCounter<u64>,
http_body_gauge: BoundValueRecorder<u64>,
http_req_histogram: BoundValueRecorder<f64>,
bucket_v2_merkle_updater_todo_queue_length: BoundValueRecorder<f64>,
}
impl AdminServer {
/// init initilialize the AdminServer and background metric server
pub fn init() -> AdminServer {
let exporter = opentelemetry_prometheus::exporter().init();
let meter = global::meter("garage/admin_server");
AdminServer {
exporter,
metrics: AdminServerMetrics {
http_counter: meter
.u64_counter("router.http_requests_total")
.with_description("Total number of HTTP requests made.")
.init()
.bind(HANDLER_ALL.as_ref()),
http_body_gauge: meter
.u64_value_recorder("example.http_response_size_bytes")
.with_description("The metrics HTTP response sizes in bytes.")
.init()
.bind(HANDLER_ALL.as_ref()),
http_req_histogram: meter
.f64_value_recorder("example.http_request_duration_seconds")
.with_description("The HTTP request latencies in seconds.")
.init()
.bind(HANDLER_ALL.as_ref()),
bucket_v2_merkle_updater_todo_queue_length: meter
.f64_value_recorder("bucket_v2.merkle_updater.todo_queue_length")
.with_description("Bucket merkle updater TODO queue length.")
.init()
.bind(HANDLER_ALL.as_ref()),
},
}
}
/// run execute the admin server on the designated HTTP port and listen for requests
pub async fn run(
self,
garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), GarageError> {
let admin_server = Arc::new(self);
// For every connection, we must make a `Service` to handle all
// incoming HTTP requests on said connection.
let make_svc = make_service_fn(move |_conn| {
let admin_server = admin_server.clone();
// This is the `Service` that will handle the connection.
// `service_fn` is a helper to convert a function that
// returns a Response into a `Service`.
async move {
Ok::<_, Infallible>(service_fn(move |req| serve_req(req, admin_server.clone())))
}
});
let addr = &garage.config.admin_api.bind_addr;
let server = Server::bind(&addr).serve(make_svc);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("Admin server listening on http://{}", addr);
graceful.await?;
Ok(())
}
}

View file

@ -27,6 +27,7 @@ garage_rpc = { version = "0.6.0", path = "../rpc" }
garage_table = { version = "0.6.0", path = "../table" } garage_table = { version = "0.6.0", path = "../table" }
garage_util = { version = "0.6.0", path = "../util" } garage_util = { version = "0.6.0", path = "../util" }
garage_web = { version = "0.6.0", path = "../web" } garage_web = { version = "0.6.0", path = "../web" }
garage_admin = { version = "0.6.0", path = "../admin" }
bytes = "1.0" bytes = "1.0"
git-version = "0.3.4" git-version = "0.3.4"

View file

@ -6,6 +6,7 @@ use garage_util::background::*;
use garage_util::config::*; use garage_util::config::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_admin::metrics::*;
use garage_api::run_api_server; use garage_api::run_api_server;
use garage_model::garage::Garage; use garage_model::garage::Garage;
use garage_web::run_web_server; use garage_web::run_web_server;
@ -34,6 +35,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
.open() .open()
.expect("Unable to open sled DB"); .expect("Unable to open sled DB");
info!("Configure and run admin web server...");
let admin_server_init = AdminServer::init();
info!("Initializing background runner..."); info!("Initializing background runner...");
let watch_cancel = netapp::util::watch_ctrl_c(); let watch_cancel = netapp::util::watch_ctrl_c();
let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone()); let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone());
@ -43,7 +47,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone())); let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone()));
info!("Crate admin RPC handler..."); info!("Create admin RPC handler...");
AdminRpcHandler::new(garage.clone()); AdminRpcHandler::new(garage.clone());
info!("Initializing API server..."); info!("Initializing API server...");
@ -58,6 +62,10 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
wait_from(watch_cancel.clone()), wait_from(watch_cancel.clone()),
)); ));
info!("Configure and run admin web server...");
let admin_server =
tokio::spawn(admin_server_init.run(garage.clone(), wait_from(watch_cancel.clone())));
// Stuff runs // Stuff runs
// When a cancel signal is sent, stuff stops // When a cancel signal is sent, stuff stops
@ -67,6 +75,9 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
if let Err(e) = web_server.await? { if let Err(e) = web_server.await? {
warn!("Web server exited with error: {}", e); warn!("Web server exited with error: {}", e);
} }
if let Err(e) = admin_server.await? {
warn!("Admin web server exited with error: {}", e);
}
// Remove RPC handlers for system to break reference cycles // Remove RPC handlers for system to break reference cycles
garage.system.netapp.drop_all_handlers(); garage.system.netapp.drop_all_handlers();

View file

@ -73,6 +73,9 @@ pub struct Config {
/// Configuration for serving files as normal web server /// Configuration for serving files as normal web server
pub s3_web: WebConfig, pub s3_web: WebConfig,
/// Configuration for the admin API endpoint
pub admin_api: AdminConfig,
} }
/// Configuration for S3 api /// Configuration for S3 api
@ -96,6 +99,13 @@ pub struct WebConfig {
pub root_domain: String, pub root_domain: String,
} }
/// Configuration for the admin and monitoring HTTP API
#[derive(Deserialize, Debug, Clone)]
pub struct AdminConfig {
/// Address and port to bind for admin API serving
pub bind_addr: SocketAddr,
}
fn default_sled_cache_capacity() -> u64 { fn default_sled_cache_capacity() -> u64 {
128 * 1024 * 1024 128 * 1024 * 1024
} }