Refactoring: rename config files, make modifications less invasive

This commit is contained in:
Alex Auvolat 2022-02-22 15:25:13 +01:00
parent d9a35359bf
commit dc8d0496cc
No known key found for this signature in database
GPG key ID: 0E496D15096376BE
5 changed files with 155 additions and 141 deletions

View file

@ -45,8 +45,8 @@ bind_addr = "0.0.0.0:$((3920+$count))"
root_domain = ".web.garage.localhost" root_domain = ".web.garage.localhost"
index = "index.html" index = "index.html"
[admin_api] [admin]
bind_addr = "0.0.0.0:$((9900+$count))" api_bind_addr = "0.0.0.0:$((9900+$count))"
EOF EOF
echo -en "$LABEL configuration written to $CONF_PATH\n" echo -en "$LABEL configuration written to $CONF_PATH\n"

View file

@ -47,7 +47,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
let garage = Garage::new(config.clone(), db, background); let garage = Garage::new(config.clone(), db, background);
info!("Initialize tracing..."); info!("Initialize tracing...");
if let Some(export_to) = config.admin_api.otlp_export_traces_to { if let Some(export_to) = config.admin.trace_sink {
init_tracing(&export_to, garage.system.id)?; init_tracing(&export_to, garage.system.id)?;
} }
@ -70,7 +70,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
info!("Configure and run admin web server..."); info!("Configure and run admin web server...");
let admin_server = tokio::spawn( let admin_server = tokio::spawn(
admin_server_init.run(config.admin_api.bind_addr, wait_from(watch_cancel.clone())), admin_server_init.run(config.admin.api_bind_addr, wait_from(watch_cancel.clone())),
); );
// Stuff runs // Stuff runs

View file

@ -66,8 +66,8 @@ bind_addr = "127.0.0.1:{web_port}"
root_domain = ".web.garage" root_domain = ".web.garage"
index = "index.html" index = "index.html"
[admin_api] [admin]
bind_addr = "127.0.0.1:{admin_port}" api_bind_addr = "127.0.0.1:{admin_port}"
"#, "#,
path = path.display(), path = path.display(),
secret = GARAGE_TEST_SECRET, secret = GARAGE_TEST_SECRET,

View file

@ -238,154 +238,168 @@ impl RpcHelper {
span.set_attribute(KeyValue::new("to", format!("{:?}", to))); span.set_attribute(KeyValue::new("to", format!("{:?}", to)));
span.set_attribute(KeyValue::new("quorum", quorum as i64)); span.set_attribute(KeyValue::new("quorum", quorum as i64));
async { self.try_call_many_internal(endpoint, to, msg, strategy, quorum)
let msg = Arc::new(msg); .with_context(Context::current_with_span(span))
.await
}
// Build future for each request async fn try_call_many_internal<M, H, S>(
// They are not started now: they are added below in a FuturesUnordered &self,
// object that will take care of polling them (see below) endpoint: &Arc<Endpoint<M, H>>,
let requests = to.iter().cloned().map(|to| { to: &[Uuid],
let self2 = self.clone(); msg: M,
let msg = msg.clone(); strategy: RequestStrategy,
let endpoint2 = endpoint.clone(); quorum: usize,
(to, async move { ) -> Result<Vec<S>, Error>
self2.call_arc(&endpoint2, to, msg, strategy).await where
M: Rpc<Response = Result<S, Error>> + 'static,
H: EndpointHandler<M> + 'static,
S: Send + 'static,
{
let msg = Arc::new(msg);
// Build future for each request
// They are not started now: they are added below in a FuturesUnordered
// object that will take care of polling them (see below)
let requests = to.iter().cloned().map(|to| {
let self2 = self.clone();
let msg = msg.clone();
let endpoint2 = endpoint.clone();
(to, async move {
self2.call_arc(&endpoint2, to, msg, strategy).await
})
});
// Vectors in which success results and errors will be collected
let mut successes = vec![];
let mut errors = vec![];
if strategy.rs_interrupt_after_quorum {
// Case 1: once quorum is reached, other requests don't matter.
// What we do here is only send the required number of requests
// to reach a quorum, priorizing nodes with the lowest latency.
// When there are errors, we start new requests to compensate.
// Retrieve some status variables that we will use to sort requests
let peer_list = self.0.fullmesh.get_peer_list();
let ring: Arc<Ring> = self.0.ring.borrow().clone();
let our_zone = match ring.layout.node_role(&self.0.our_node_id) {
Some(pc) => &pc.zone,
None => "",
};
// Augment requests with some information used to sort them.
// The tuples are as follows:
// (is another node?, is another zone?, latency, node ID, request future)
// We store all of these tuples in a vec that we can sort.
// By sorting this vec, we priorize ourself, then nodes in the same zone,
// and within a same zone we priorize nodes with the lowest latency.
let mut requests = requests
.map(|(to, fut)| {
let peer_zone = match ring.layout.node_role(&to) {
Some(pc) => &pc.zone,
None => "",
};
let peer_avg_ping = peer_list
.iter()
.find(|x| x.id.as_ref() == to.as_slice())
.map(|pi| pi.avg_ping)
.flatten()
.unwrap_or_else(|| Duration::from_secs(1));
(
to != self.0.our_node_id,
peer_zone != our_zone,
peer_avg_ping,
to,
fut,
)
}) })
}); .collect::<Vec<_>>();
// Vectors in which success results and errors will be collected // Sort requests by (priorize ourself, priorize same zone, priorize low latency)
let mut successes = vec![]; requests
let mut errors = vec![]; .sort_by_key(|(diffnode, diffzone, ping, _to, _fut)| (*diffnode, *diffzone, *ping));
if strategy.rs_interrupt_after_quorum { // Make an iterator to take requests in their sorted order
// Case 1: once quorum is reached, other requests don't matter. let mut requests = requests.into_iter();
// What we do here is only send the required number of requests
// to reach a quorum, priorizing nodes with the lowest latency.
// When there are errors, we start new requests to compensate.
// Retrieve some status variables that we will use to sort requests // resp_stream will contain all of the requests that are currently in flight.
let peer_list = self.0.fullmesh.get_peer_list(); // (for the moment none, they will be added in the loop below)
let ring: Arc<Ring> = self.0.ring.borrow().clone(); let mut resp_stream = FuturesUnordered::new();
let our_zone = match ring.layout.node_role(&self.0.our_node_id) {
Some(pc) => &pc.zone,
None => "",
};
// Augment requests with some information used to sort them. // Do some requests and collect results
// The tuples are as follows: 'request_loop: while successes.len() < quorum {
// (is another node?, is another zone?, latency, node ID, request future) // If the current set of requests that are running is not enough to possibly
// We store all of these tuples in a vec that we can sort. // reach quorum, start some new requests.
// By sorting this vec, we priorize ourself, then nodes in the same zone, while successes.len() + resp_stream.len() < quorum {
// and within a same zone we priorize nodes with the lowest latency. if let Some((_, _, _, req_to, fut)) = requests.next() {
let mut requests = requests let tracer = opentelemetry::global::tracer("garage");
.map(|(to, fut)| { let span = tracer.start(format!("RPC to {:?}", req_to));
let peer_zone = match ring.layout.node_role(&to) { resp_stream.push(tokio::spawn(
Some(pc) => &pc.zone, fut.with_context(Context::current_with_span(span)),
None => "", ));
}; } else {
let peer_avg_ping = peer_list // If we have no request to add, we know that we won't ever
.iter() // reach quorum: bail out now.
.find(|x| x.id.as_ref() == to.as_slice()) break 'request_loop;
.map(|pi| pi.avg_ping)
.flatten()
.unwrap_or_else(|| Duration::from_secs(1));
(
to != self.0.our_node_id,
peer_zone != our_zone,
peer_avg_ping,
to,
fut,
)
})
.collect::<Vec<_>>();
// Sort requests by (priorize ourself, priorize same zone, priorize low latency)
requests.sort_by_key(|(diffnode, diffzone, ping, _to, _fut)| {
(*diffnode, *diffzone, *ping)
});
// Make an iterator to take requests in their sorted order
let mut requests = requests.into_iter();
// resp_stream will contain all of the requests that are currently in flight.
// (for the moment none, they will be added in the loop below)
let mut resp_stream = FuturesUnordered::new();
// Do some requests and collect results
'request_loop: while successes.len() < quorum {
// If the current set of requests that are running is not enough to possibly
// reach quorum, start some new requests.
while successes.len() + resp_stream.len() < quorum {
if let Some((_, _, _, req_to, fut)) = requests.next() {
let span = tracer.start(format!("RPC to {:?}", req_to));
resp_stream.push(tokio::spawn(
fut.with_context(Context::current_with_span(span)),
));
} else {
// If we have no request to add, we know that we won't ever
// reach quorum: bail out now.
break 'request_loop;
}
}
assert!(!resp_stream.is_empty()); // because of loop invariants
// Wait for one request to terminate
match resp_stream.next().await.unwrap().unwrap() {
Ok(msg) => {
successes.push(msg);
}
Err(e) => {
errors.push(e);
}
} }
} }
} else { assert!(!resp_stream.is_empty()); // because of loop invariants
// Case 2: all of the requests need to be sent in all cases,
// and need to terminate. (this is the case for writes that
// must be spread to n nodes)
// Just start all the requests in parallel and return as soon
// as the quorum is reached.
let mut resp_stream = requests
.map(|(_, fut)| fut)
.collect::<FuturesUnordered<_>>();
while let Some(resp) = resp_stream.next().await { // Wait for one request to terminate
match resp { match resp_stream.next().await.unwrap().unwrap() {
Ok(msg) => { Ok(msg) => {
successes.push(msg); successes.push(msg);
if successes.len() >= quorum { }
break; Err(e) => {
} errors.push(e);
}
Err(e) => {
errors.push(e);
}
} }
} }
}
} else {
// Case 2: all of the requests need to be sent in all cases,
// and need to terminate. (this is the case for writes that
// must be spread to n nodes)
// Just start all the requests in parallel and return as soon
// as the quorum is reached.
let mut resp_stream = requests
.map(|(_, fut)| fut)
.collect::<FuturesUnordered<_>>();
if !resp_stream.is_empty() { while let Some(resp) = resp_stream.next().await {
// Continue remaining requests in background. match resp {
// Continue the remaining requests immediately using tokio::spawn Ok(msg) => {
// but enqueue a task in the background runner successes.push(msg);
// to ensure that the process won't exit until the requests are done if successes.len() >= quorum {
// (if we had just enqueued the resp_stream.collect directly in the background runner, break;
// the requests might have been put on hold in the background runner's queue, }
// in which case they might timeout or otherwise fail) }
let wait_finished_fut = tokio::spawn(async move { Err(e) => {
resp_stream.collect::<Vec<Result<_, _>>>().await; errors.push(e);
}); }
self.0.background.spawn(wait_finished_fut.map(|_| Ok(())));
} }
} }
if successes.len() >= quorum { if !resp_stream.is_empty() {
Ok(successes) // Continue remaining requests in background.
} else { // Continue the remaining requests immediately using tokio::spawn
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>(); // but enqueue a task in the background runner
Err(Error::Quorum(quorum, successes.len(), to.len(), errors)) // to ensure that the process won't exit until the requests are done
// (if we had just enqueued the resp_stream.collect directly in the background runner,
// the requests might have been put on hold in the background runner's queue,
// in which case they might timeout or otherwise fail)
let wait_finished_fut = tokio::spawn(async move {
resp_stream.collect::<Vec<Result<_, _>>>().await;
});
self.0.background.spawn(wait_finished_fut.map(|_| Ok(())));
} }
} }
.with_context(Context::current_with_span(span))
.await if successes.len() >= quorum {
Ok(successes)
} else {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
Err(Error::Quorum(quorum, successes.len(), to.len(), errors))
}
} }
} }

View file

@ -75,7 +75,7 @@ pub struct Config {
pub s3_web: WebConfig, pub s3_web: WebConfig,
/// Configuration for the admin API endpoint /// Configuration for the admin API endpoint
pub admin_api: AdminConfig, pub admin: AdminConfig,
} }
/// Configuration for S3 api /// Configuration for S3 api
@ -103,9 +103,9 @@ pub struct WebConfig {
#[derive(Deserialize, Debug, Clone)] #[derive(Deserialize, Debug, Clone)]
pub struct AdminConfig { pub struct AdminConfig {
/// Address and port to bind for admin API serving /// Address and port to bind for admin API serving
pub bind_addr: SocketAddr, pub api_bind_addr: SocketAddr,
/// OTLP server to where to export traces /// OTLP server to where to export traces
pub otlp_export_traces_to: Option<String>, pub trace_sink: Option<String>,
} }
fn default_sled_cache_capacity() -> u64 { fn default_sled_cache_capacity() -> u64 {