Performance improvements:

- reduce contention on mutation_lock by having 256 of them
- better lmdb defaults
This commit is contained in:
Alex Auvolat 2022-07-18 18:40:57 +02:00
parent 1b2e1296eb
commit 2f111e6b3d
No known key found for this signature in database
GPG key ID: 0E496D15096376BE
4 changed files with 59 additions and 28 deletions

View file

@ -8,6 +8,11 @@ use hyper::{Request, Response};
use md5::{digest::generic_array::*, Digest as Md5Digest, Md5};
use sha2::Sha256;
use opentelemetry::{
trace::{FutureExt as OtelFutureExt, TraceContextExt, Tracer},
Context,
};
use garage_table::*;
use garage_util::async_hash::*;
use garage_util::data::*;
@ -279,12 +284,21 @@ async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
first_block_hash: Hash,
chunker: &mut StreamChunker<S>,
) -> Result<(u64, GenericArray<u8, typenum::U16>, Hash), Error> {
let tracer = opentelemetry::global::tracer("garage");
let first_block = Bytes::from(first_block);
let md5hasher = AsyncHasher::<Md5>::new();
let sha256hasher = AsyncHasher::<Sha256>::new();
md5hasher.update(first_block.clone());
sha256hasher.update(first_block.clone());
futures::future::join(
md5hasher.update(first_block.clone()),
sha256hasher.update(first_block.clone()),
)
.with_context(Context::current_with_span(
tracer.start("Hash first block (md5, sha256)"),
))
.await;
let mut next_offset = first_block.len();
let mut put_curr_version_block = put_block_meta(
@ -307,9 +321,15 @@ async fn read_and_put_blocks<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
)?;
if let Some(block) = next_block {
let block = Bytes::from(block);
md5hasher.update(block.clone());
sha256hasher.update(block.clone());
let block_hash = async_blake2sum(block.clone()).await;
let (_, _, block_hash) = futures::future::join3(
md5hasher.update(block.clone()),
sha256hasher.update(block.clone()),
async_blake2sum(block.clone()),
)
.with_context(Context::current_with_span(
tracer.start("Hash block (md5, sha256, blake2)"),
))
.await;
let block_len = block.len();
put_curr_version_block = put_block_meta(
garage,

View file

@ -93,7 +93,7 @@ pub struct BlockManager {
compression_level: Option<i32>,
background_tranquility: u32,
mutation_lock: Mutex<BlockManagerLocked>,
mutation_lock: [Mutex<BlockManagerLocked>; 256],
pub(crate) rc: BlockRc,
@ -150,8 +150,6 @@ impl BlockManager {
.netapp
.endpoint("garage_block/manager.rs/Rpc".to_string());
let manager_locked = BlockManagerLocked();
let metrics = BlockManagerMetrics::new(resync_queue.clone(), resync_errors.clone());
let block_manager = Arc::new(Self {
@ -159,7 +157,7 @@ impl BlockManager {
data_dir,
compression_level,
background_tranquility,
mutation_lock: Mutex::new(manager_locked),
mutation_lock: [(); 256].map(|_| Mutex::new(BlockManagerLocked())),
rc,
resync_queue,
resync_notify: Notify::new(),
@ -313,14 +311,21 @@ impl BlockManager {
/// Write a block to disk
async fn write_block(&self, hash: &Hash, data: &DataBlock) -> Result<BlockRpc, Error> {
let tracer = opentelemetry::global::tracer("garage");
let write_size = data.inner_buffer().len() as u64;
let res = self
.mutation_lock
let res = self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.with_context(Context::current_with_span(
tracer.start("Acquire mutation_lock"),
))
.await
.write_block(hash, data, self)
.bound_record_duration(&self.metrics.block_write_duration)
.with_context(Context::current_with_span(
tracer.start("BlockManagerLocked::write_block"),
))
.await?;
self.metrics.bytes_written.add(write_size);
@ -370,7 +375,7 @@ impl BlockManager {
if data.verify(*hash).is_err() {
self.metrics.corruption_counter.add(1);
self.mutation_lock
self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.await
.move_block_to_corrupted(hash, self)
@ -384,8 +389,7 @@ impl BlockManager {
/// Check if this node should have a block, but don't actually have it
async fn need_block(&self, hash: &Hash) -> Result<bool, Error> {
let BlockStatus { exists, needed } = self
.mutation_lock
let BlockStatus { exists, needed } = self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.await
.check_block_status(hash, self)
@ -608,8 +612,7 @@ impl BlockManager {
}
async fn resync_block(&self, hash: &Hash) -> Result<(), Error> {
let BlockStatus { exists, needed } = self
.mutation_lock
let BlockStatus { exists, needed } = self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.await
.check_block_status(hash, self)
@ -694,7 +697,7 @@ impl BlockManager {
who.len()
);
self.mutation_lock
self.mutation_lock[hash.as_slice()[0] as usize]
.lock()
.await
.delete_if_unneeded(hash, self)

View file

@ -104,11 +104,16 @@ impl Garage {
std::fs::create_dir_all(&db_path).expect("Unable to create LMDB data directory");
let map_size = garage_db::lmdb_adapter::recommended_map_size();
let db = db::lmdb_adapter::heed::EnvOpenOptions::new()
.max_dbs(100)
.map_size(map_size)
.open(&db_path)
.expect("Unable to open LMDB DB");
use db::lmdb_adapter::heed;
let mut env_builder = heed::EnvOpenOptions::new();
env_builder.max_dbs(100);
env_builder.max_readers(500);
env_builder.map_size(map_size);
unsafe {
env_builder.flag(heed::flags::Flags::MdbNoSync);
env_builder.flag(heed::flags::Flags::MdbNoMetaSync);
}
let db = env_builder.open(&db_path).expect("Unable to open LMDB DB");
db::lmdb_adapter::LmdbDb::init(db)
}
e => {

View file

@ -1,7 +1,7 @@
use bytes::Bytes;
use digest::Digest;
use tokio::sync::mpsc;
use tokio::sync::{mpsc, oneshot};
use tokio::task::JoinHandle;
use crate::data::*;
@ -27,25 +27,28 @@ pub async fn async_blake2sum(data: Bytes) -> Hash {
// ----
pub struct AsyncHasher<D: Digest> {
sendblk: mpsc::UnboundedSender<Bytes>,
sendblk: mpsc::UnboundedSender<(Bytes, oneshot::Sender<()>)>,
task: JoinHandle<digest::Output<D>>,
}
impl<D: Digest> AsyncHasher<D> {
pub fn new() -> Self {
let (sendblk, mut recvblk) = mpsc::unbounded_channel::<Bytes>();
let (sendblk, mut recvblk) = mpsc::unbounded_channel::<(Bytes, oneshot::Sender<()>)>();
let task = tokio::task::spawn_blocking(move || {
let mut digest = D::new();
while let Some(blk) = recvblk.blocking_recv() {
while let Some((blk, ch)) = recvblk.blocking_recv() {
digest.update(&blk[..]);
let _ = ch.send(());
}
digest.finalize()
});
Self { sendblk, task }
}
pub fn update(&self, b: Bytes) {
self.sendblk.send(b).unwrap()
pub async fn update(&self, b: Bytes) {
let (tx, rx) = oneshot::channel();
self.sendblk.send((b, tx)).unwrap();
let _ = rx.await;
}
pub async fn finalize(self) -> digest::Output<D> {