mirror of
https://git.deuxfleurs.fr/Deuxfleurs/garage.git
synced 2024-11-21 23:51:00 +00:00
Checkpoint: add merkle tree in data table
This commit is contained in:
parent
3214dd52dd
commit
8d63738cb0
11 changed files with 450 additions and 82 deletions
|
@ -12,7 +12,7 @@ steps:
|
|||
image: alpine/git
|
||||
commands:
|
||||
- mkdir -p cargo
|
||||
- git clone https://git.deuxfleurs.fr/Deuxfleurs/garage.git
|
||||
- git clone $DRONE_GIT_HTTP_URL
|
||||
- cd garage
|
||||
- git checkout $DRONE_COMMIT
|
||||
|
||||
|
@ -34,8 +34,8 @@ steps:
|
|||
- 'garage/target'
|
||||
- 'cargo/registry/index'
|
||||
- 'cargo/registry/cache'
|
||||
- 'cargo/git/db'
|
||||
- 'cargo/bin'
|
||||
- 'cargo/git/db'
|
||||
path_style: true
|
||||
endpoint: https://garage.deuxfleurs.fr
|
||||
|
||||
|
|
|
@ -350,8 +350,7 @@ impl AdminRpcHandler {
|
|||
.background
|
||||
.spawn_worker("Repair worker".into(), move |must_exit| async move {
|
||||
repair.repair_worker(opt, must_exit).await
|
||||
})
|
||||
.await;
|
||||
});
|
||||
Ok(AdminRPC::Ok(format!(
|
||||
"Repair launched on {:?}",
|
||||
self.garage.system.id
|
||||
|
|
|
@ -49,7 +49,7 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
|
|||
let (send_cancel, watch_cancel) = watch::channel(false);
|
||||
let background = BackgroundRunner::new(16, watch_cancel.clone());
|
||||
|
||||
let garage = Garage::new(config, db, background.clone(), &mut rpc_server).await;
|
||||
let garage = Garage::new(config, db, background.clone(), &mut rpc_server);
|
||||
|
||||
info!("Crate admin RPC handler...");
|
||||
AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server);
|
||||
|
|
|
@ -127,18 +127,16 @@ impl BlockManager {
|
|||
}
|
||||
}
|
||||
|
||||
pub async fn spawn_background_worker(self: Arc<Self>) {
|
||||
pub fn spawn_background_worker(self: Arc<Self>) {
|
||||
// Launch 2 simultaneous workers for background resync loop preprocessing
|
||||
for i in 0..2usize {
|
||||
for i in 0..2u64 {
|
||||
let bm2 = self.clone();
|
||||
let background = self.system.background.clone();
|
||||
tokio::spawn(async move {
|
||||
tokio::time::delay_for(Duration::from_secs(10)).await;
|
||||
background
|
||||
.spawn_worker(format!("block resync worker {}", i), move |must_exit| {
|
||||
bm2.resync_loop(must_exit)
|
||||
})
|
||||
.await;
|
||||
tokio::time::delay_for(Duration::from_secs(10 * (i + 1))).await;
|
||||
background.spawn_worker(format!("block resync worker {}", i), move |must_exit| {
|
||||
bm2.resync_loop(must_exit)
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ pub struct Garage {
|
|||
}
|
||||
|
||||
impl Garage {
|
||||
pub async fn new(
|
||||
pub fn new(
|
||||
config: Config,
|
||||
db: sled::Db,
|
||||
background: Arc<BackgroundRunner>,
|
||||
|
@ -86,8 +86,7 @@ impl Garage {
|
|||
&db,
|
||||
"block_ref".to_string(),
|
||||
rpc_server,
|
||||
)
|
||||
.await;
|
||||
);
|
||||
|
||||
info!("Initialize version_table...");
|
||||
let version_table = Table::new(
|
||||
|
@ -100,8 +99,7 @@ impl Garage {
|
|||
&db,
|
||||
"version".to_string(),
|
||||
rpc_server,
|
||||
)
|
||||
.await;
|
||||
);
|
||||
|
||||
info!("Initialize object_table...");
|
||||
let object_table = Table::new(
|
||||
|
@ -114,8 +112,7 @@ impl Garage {
|
|||
&db,
|
||||
"object".to_string(),
|
||||
rpc_server,
|
||||
)
|
||||
.await;
|
||||
);
|
||||
|
||||
info!("Initialize bucket_table...");
|
||||
let bucket_table = Table::new(
|
||||
|
@ -125,8 +122,7 @@ impl Garage {
|
|||
&db,
|
||||
"bucket".to_string(),
|
||||
rpc_server,
|
||||
)
|
||||
.await;
|
||||
);
|
||||
|
||||
info!("Initialize key_table_table...");
|
||||
let key_table = Table::new(
|
||||
|
@ -136,8 +132,7 @@ impl Garage {
|
|||
&db,
|
||||
"key".to_string(),
|
||||
rpc_server,
|
||||
)
|
||||
.await;
|
||||
);
|
||||
|
||||
info!("Initialize Garage...");
|
||||
let garage = Arc::new(Self {
|
||||
|
@ -155,7 +150,7 @@ impl Garage {
|
|||
|
||||
info!("Start block manager background thread...");
|
||||
garage.block_manager.garage.swap(Some(garage.clone()));
|
||||
garage.block_manager.clone().spawn_background_worker().await;
|
||||
garage.block_manager.clone().spawn_background_worker();
|
||||
|
||||
garage
|
||||
}
|
||||
|
|
|
@ -319,8 +319,7 @@ impl System {
|
|||
.background
|
||||
.spawn_worker(format!("ping loop"), |stop_signal| {
|
||||
self2.ping_loop(stop_signal).map(Ok)
|
||||
})
|
||||
.await;
|
||||
});
|
||||
|
||||
if let (Some(consul_host), Some(consul_service_name)) = (consul_host, consul_service_name) {
|
||||
let self2 = self.clone();
|
||||
|
@ -330,8 +329,7 @@ impl System {
|
|||
self2
|
||||
.consul_loop(stop_signal, consul_host, consul_service_name)
|
||||
.map(Ok)
|
||||
})
|
||||
.await;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ pub mod crdt;
|
|||
pub mod schema;
|
||||
pub mod util;
|
||||
|
||||
pub mod merkle;
|
||||
pub mod table;
|
||||
pub mod table_fullcopy;
|
||||
pub mod table_sharded;
|
||||
|
|
352
src/table/merkle.rs
Normal file
352
src/table/merkle.rs
Normal file
|
@ -0,0 +1,352 @@
|
|||
use std::convert::TryInto;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::select;
|
||||
use futures_util::future::*;
|
||||
use log::{info, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sled::transaction::{
|
||||
ConflictableTransactionError, ConflictableTransactionResult, TransactionalTree,
|
||||
};
|
||||
use tokio::sync::{watch, Notify};
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
// This modules partitions the data in 2**16 partitions, based on the top
|
||||
// 16 bits (two bytes) of item's partition keys' hashes.
|
||||
// It builds one Merkle tree for each of these 2**16 partitions.
|
||||
|
||||
pub(crate) struct MerkleUpdater {
|
||||
table_name: String,
|
||||
background: Arc<BackgroundRunner>,
|
||||
|
||||
// Content of the todo tree: items where
|
||||
// - key = the key of an item in the main table, ie hash(partition_key)+sort_key
|
||||
// - value = the hash of the full serialized item, if present,
|
||||
// or an empty vec if item is absent (deleted)
|
||||
pub(crate) todo: sled::Tree,
|
||||
pub(crate) todo_notify: Notify,
|
||||
|
||||
// Content of the merkle tree: items where
|
||||
// - key = .bytes() for MerkleNodeKey
|
||||
// - value = serialization of a MerkleNode, assumed to be MerkleNode::empty if not found
|
||||
pub(crate) merkle_tree: sled::Tree,
|
||||
empty_node_hash: Hash,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MerkleNodeKey {
|
||||
// partition: first 16 bits (two bytes) of the partition_key's hash
|
||||
pub partition: [u8; 2],
|
||||
|
||||
// prefix: a prefix for the hash of full keys, i.e. hash(hash(partition_key)+sort_key)
|
||||
pub prefix: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize)]
|
||||
pub enum MerkleNode {
|
||||
// The empty Merkle node
|
||||
Empty,
|
||||
|
||||
// An intermediate Merkle tree node for a prefix
|
||||
// Contains the hashes of the 256 possible next prefixes
|
||||
Intermediate(Vec<(u8, Hash)>),
|
||||
|
||||
// A final node for an item
|
||||
// Contains the full key of the item and the hash of the value
|
||||
Leaf(Vec<u8>, Hash),
|
||||
}
|
||||
|
||||
impl MerkleUpdater {
|
||||
pub(crate) fn new(
|
||||
table_name: String,
|
||||
background: Arc<BackgroundRunner>,
|
||||
todo: sled::Tree,
|
||||
merkle_tree: sled::Tree,
|
||||
) -> Arc<Self> {
|
||||
let empty_node_hash = blake2sum(&rmp_to_vec_all_named(&MerkleNode::Empty).unwrap()[..]);
|
||||
|
||||
Arc::new(Self {
|
||||
table_name,
|
||||
background,
|
||||
todo,
|
||||
todo_notify: Notify::new(),
|
||||
merkle_tree,
|
||||
empty_node_hash,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn launch(self: &Arc<Self>) {
|
||||
let self2 = self.clone();
|
||||
self.background.spawn_worker(
|
||||
format!("Merkle tree updater for {}", self.table_name),
|
||||
|must_exit: watch::Receiver<bool>| self2.updater_loop(must_exit),
|
||||
);
|
||||
}
|
||||
|
||||
async fn updater_loop(
|
||||
self: Arc<Self>,
|
||||
mut must_exit: watch::Receiver<bool>,
|
||||
) -> Result<(), Error> {
|
||||
while !*must_exit.borrow() {
|
||||
if let Some(x) = self.todo.iter().next() {
|
||||
match x {
|
||||
Ok((key, valhash)) => {
|
||||
if let Err(e) = self.update_item(&key[..], &valhash[..]) {
|
||||
warn!("Error while updating Merkle tree item: {}", e);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Error while iterating on Merkle todo tree: {}", e);
|
||||
tokio::time::delay_for(Duration::from_secs(10)).await;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
select! {
|
||||
_ = self.todo_notify.notified().fuse() => (),
|
||||
_ = must_exit.recv().fuse() => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_item(&self, k: &[u8], vhash_by: &[u8]) -> Result<(), Error> {
|
||||
let khash = blake2sum(k);
|
||||
|
||||
let new_vhash = if vhash_by.len() == 0 {
|
||||
None
|
||||
} else {
|
||||
let vhash_by: [u8; 32] = vhash_by
|
||||
.try_into()
|
||||
.map_err(|_| Error::Message(format!("Invalid value in Merkle todo table")))?;
|
||||
Some(Hash::from(vhash_by))
|
||||
};
|
||||
|
||||
let key = MerkleNodeKey {
|
||||
partition: k[0..2].try_into().unwrap(),
|
||||
prefix: vec![],
|
||||
};
|
||||
self.merkle_tree
|
||||
.transaction(|tx| self.update_item_rec(tx, k, khash, &key, new_vhash))?;
|
||||
|
||||
let deleted = self
|
||||
.todo
|
||||
.compare_and_swap::<_, _, Vec<u8>>(k, Some(vhash_by), None)?
|
||||
.is_ok();
|
||||
|
||||
if !deleted {
|
||||
info!(
|
||||
"Item not deleted from Merkle todo because it changed: {:?}",
|
||||
k
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_item_rec(
|
||||
&self,
|
||||
tx: &TransactionalTree,
|
||||
k: &[u8],
|
||||
khash: Hash,
|
||||
key: &MerkleNodeKey,
|
||||
new_vhash: Option<Hash>,
|
||||
) -> ConflictableTransactionResult<Option<Hash>, Error> {
|
||||
let i = key.prefix.len();
|
||||
let mutate = match self.read_node_txn(tx, &key)? {
|
||||
MerkleNode::Empty => {
|
||||
if let Some(vhv) = new_vhash {
|
||||
Some(MerkleNode::Leaf(k.to_vec(), vhv))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
MerkleNode::Intermediate(mut children) => {
|
||||
let key2 = key.next_key(khash);
|
||||
if let Some(subhash) = self.update_item_rec(tx, k, khash, &key2, new_vhash)? {
|
||||
if subhash == self.empty_node_hash {
|
||||
intermediate_rm_child(&mut children, key2.prefix[i]);
|
||||
} else {
|
||||
intermediate_set_child(&mut children, key2.prefix[i], subhash);
|
||||
}
|
||||
if children.len() == 0 {
|
||||
// should not happen
|
||||
warn!("Replacing intermediate node with empty node, should not happen.");
|
||||
Some(MerkleNode::Empty)
|
||||
} else if children.len() == 1 {
|
||||
// move node down to this level
|
||||
let key_sub = key.add_byte(children[0].0);
|
||||
let subnode = self.read_node_txn(tx, &key_sub)?;
|
||||
tx.remove(key_sub.encode())?;
|
||||
Some(subnode)
|
||||
} else {
|
||||
Some(MerkleNode::Intermediate(children))
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
MerkleNode::Leaf(exlf_key, exlf_hash) => {
|
||||
if exlf_key == k {
|
||||
match new_vhash {
|
||||
Some(vhv) if vhv == exlf_hash => None,
|
||||
Some(vhv) => Some(MerkleNode::Leaf(k.to_vec(), vhv)),
|
||||
None => Some(MerkleNode::Empty),
|
||||
}
|
||||
} else {
|
||||
if let Some(vhv) = new_vhash {
|
||||
// Create two sub-nodes and replace by intermediary node
|
||||
let (pos1, h1) = {
|
||||
let key2 = key.next_key(blake2sum(&exlf_key[..]));
|
||||
let subhash =
|
||||
self.put_node_txn(tx, &key2, &MerkleNode::Leaf(exlf_key, exlf_hash))?;
|
||||
(key2.prefix[i], subhash)
|
||||
};
|
||||
let (pos2, h2) = {
|
||||
let key2 = key.next_key(khash);
|
||||
let subhash =
|
||||
self.put_node_txn(tx, &key2, &MerkleNode::Leaf(k.to_vec(), vhv))?;
|
||||
(key2.prefix[i], subhash)
|
||||
};
|
||||
let mut int = vec![];
|
||||
intermediate_set_child(&mut int, pos1, h1);
|
||||
intermediate_set_child(&mut int, pos2, h2);
|
||||
Some(MerkleNode::Intermediate(int))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(new_node) = mutate {
|
||||
let hash = self.put_node_txn(tx, &key, &new_node)?;
|
||||
Ok(Some(hash))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
// Merkle tree node manipulation
|
||||
|
||||
fn read_node_txn(
|
||||
&self,
|
||||
tx: &TransactionalTree,
|
||||
k: &MerkleNodeKey,
|
||||
) -> ConflictableTransactionResult<MerkleNode, Error> {
|
||||
let ent = tx.get(k.encode())?;
|
||||
match ent {
|
||||
None => Ok(MerkleNode::Empty),
|
||||
Some(v) => Ok(rmp_serde::decode::from_read_ref::<_, MerkleNode>(&v[..])
|
||||
.map_err(|e| ConflictableTransactionError::Abort(e.into()))?),
|
||||
}
|
||||
}
|
||||
|
||||
fn put_node_txn(
|
||||
&self,
|
||||
tx: &TransactionalTree,
|
||||
k: &MerkleNodeKey,
|
||||
v: &MerkleNode,
|
||||
) -> ConflictableTransactionResult<Hash, Error> {
|
||||
if *v == MerkleNode::Empty {
|
||||
tx.remove(k.encode())?;
|
||||
Ok(self.empty_node_hash)
|
||||
} else {
|
||||
let vby = rmp_to_vec_all_named(v)
|
||||
.map_err(|e| ConflictableTransactionError::Abort(e.into()))?;
|
||||
let rethash = blake2sum(&vby[..]);
|
||||
tx.insert(k.encode(), vby)?;
|
||||
Ok(rethash)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn read_node(
|
||||
&self,
|
||||
k: &MerkleNodeKey,
|
||||
) -> Result<MerkleNode, Error> {
|
||||
let ent = self.merkle_tree.get(k.encode())?;
|
||||
match ent {
|
||||
None => Ok(MerkleNode::Empty),
|
||||
Some(v) => Ok(rmp_serde::decode::from_read_ref::<_, MerkleNode>(&v[..])?)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MerkleNodeKey {
|
||||
fn encode(&self) -> Vec<u8> {
|
||||
let mut ret = Vec::with_capacity(2 + self.prefix.len());
|
||||
ret.extend(&self.partition[..]);
|
||||
ret.extend(&self.prefix[..]);
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn next_key(&self, h: Hash) -> Self {
|
||||
assert!(&h.as_slice()[0..self.prefix.len()] == &self.prefix[..]);
|
||||
let mut s2 = self.clone();
|
||||
s2.prefix.push(h.as_slice()[self.prefix.len()]);
|
||||
s2
|
||||
}
|
||||
|
||||
pub fn add_byte(&self, b: u8) -> Self {
|
||||
let mut s2 = self.clone();
|
||||
s2.prefix.push(b);
|
||||
s2
|
||||
}
|
||||
}
|
||||
|
||||
fn intermediate_set_child(ch: &mut Vec<(u8, Hash)>, pos: u8, v: Hash) {
|
||||
for i in 0..ch.len() {
|
||||
if ch[i].0 == pos {
|
||||
ch[i].1 = v;
|
||||
return;
|
||||
} else if ch[i].0 > pos {
|
||||
ch.insert(i, (pos, v));
|
||||
return;
|
||||
}
|
||||
}
|
||||
ch.insert(ch.len(), (pos, v));
|
||||
}
|
||||
|
||||
fn intermediate_rm_child(ch: &mut Vec<(u8, Hash)>, pos: u8) {
|
||||
for i in 0..ch.len() {
|
||||
if ch[i].0 == pos {
|
||||
ch.remove(i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_intermediate_aux() {
|
||||
let mut v = vec![];
|
||||
|
||||
intermediate_set_child(&mut v, 12u8, [12u8; 32].into());
|
||||
assert!(v == vec![(12u8, [12u8; 32].into())]);
|
||||
|
||||
intermediate_set_child(&mut v, 42u8, [42u8; 32].into());
|
||||
assert!(v == vec![(12u8, [12u8; 32].into()), (42u8, [42u8; 32].into())]);
|
||||
|
||||
intermediate_set_child(&mut v, 4u8, [4u8; 32].into());
|
||||
assert!(v == vec![(4u8, [4u8; 32].into()), (12u8, [12u8; 32].into()), (42u8, [42u8; 32].into())]);
|
||||
|
||||
intermediate_set_child(&mut v, 12u8, [8u8; 32].into());
|
||||
assert!(v == vec![(4u8, [4u8; 32].into()), (12u8, [8u8; 32].into()), (42u8, [42u8; 32].into())]);
|
||||
|
||||
intermediate_set_child(&mut v, 6u8, [6u8; 32].into());
|
||||
assert!(v == vec![(4u8, [4u8; 32].into()), (6u8, [6u8; 32].into()), (12u8, [8u8; 32].into()), (42u8, [42u8; 32].into())]);
|
||||
|
||||
intermediate_rm_child(&mut v, 42u8);
|
||||
assert!(v == vec![(4u8, [4u8; 32].into()), (6u8, [6u8; 32].into()), (12u8, [8u8; 32].into())]);
|
||||
|
||||
intermediate_rm_child(&mut v, 11u8);
|
||||
assert!(v == vec![(4u8, [4u8; 32].into()), (6u8, [6u8; 32].into()), (12u8, [8u8; 32].into())]);
|
||||
|
||||
intermediate_rm_child(&mut v, 6u8);
|
||||
assert!(v == vec![(4u8, [4u8; 32].into()), (12u8, [8u8; 32].into())]);
|
||||
|
||||
intermediate_set_child(&mut v, 6u8, [7u8; 32].into());
|
||||
assert!(v == vec![(4u8, [4u8; 32].into()), (6u8, [7u8; 32].into()), (12u8, [8u8; 32].into())]);
|
||||
}
|
|
@ -8,6 +8,7 @@ use arc_swap::ArcSwapOption;
|
|||
use futures::stream::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_bytes::ByteBuf;
|
||||
use sled::Transactional;
|
||||
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
@ -18,6 +19,7 @@ use garage_rpc::rpc_client::*;
|
|||
use garage_rpc::rpc_server::*;
|
||||
|
||||
use crate::crdt::CRDT;
|
||||
use crate::merkle::*;
|
||||
use crate::schema::*;
|
||||
use crate::table_sync::*;
|
||||
|
||||
|
@ -33,6 +35,7 @@ pub struct Table<F: TableSchema, R: TableReplication> {
|
|||
pub system: Arc<System>,
|
||||
pub store: sled::Tree,
|
||||
pub syncer: ArcSwapOption<TableSyncer<F, R>>,
|
||||
merkle_updater: Arc<MerkleUpdater>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
@ -77,7 +80,7 @@ where
|
|||
{
|
||||
// =============== PUBLIC INTERFACE FUNCTIONS (new, insert, get, etc) ===============
|
||||
|
||||
pub async fn new(
|
||||
pub fn new(
|
||||
instance: F,
|
||||
replication: R,
|
||||
system: Arc<System>,
|
||||
|
@ -85,11 +88,27 @@ where
|
|||
name: String,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let store = db.open_tree(&name).expect("Unable to open DB tree");
|
||||
let store = db
|
||||
.open_tree(&format!("{}:table", name))
|
||||
.expect("Unable to open DB tree");
|
||||
|
||||
let merkle_todo_store = db
|
||||
.open_tree(&format!("{}:merkle_todo", name))
|
||||
.expect("Unable to open DB Merkle TODO tree");
|
||||
let merkle_tree_store = db
|
||||
.open_tree(&format!("{}:merkle_tree", name))
|
||||
.expect("Unable to open DB Merkle tree tree");
|
||||
|
||||
let rpc_path = format!("table_{}", name);
|
||||
let rpc_client = system.rpc_client::<TableRPC<F>>(&rpc_path);
|
||||
|
||||
let merkle_updater = MerkleUpdater::new(
|
||||
name.clone(),
|
||||
system.background.clone(),
|
||||
merkle_todo_store,
|
||||
merkle_tree_store,
|
||||
);
|
||||
|
||||
let table = Arc::new(Self {
|
||||
instance,
|
||||
replication,
|
||||
|
@ -98,12 +117,15 @@ where
|
|||
system,
|
||||
store,
|
||||
syncer: ArcSwapOption::from(None),
|
||||
merkle_updater,
|
||||
});
|
||||
table.clone().register_handler(rpc_server, rpc_path);
|
||||
|
||||
let syncer = TableSyncer::launch(table.clone()).await;
|
||||
let syncer = TableSyncer::launch(table.clone());
|
||||
table.syncer.swap(Some(syncer));
|
||||
|
||||
table.merkle_updater.launch();
|
||||
|
||||
table
|
||||
}
|
||||
|
||||
|
@ -322,7 +344,7 @@ where
|
|||
Ok(TableRPC::Update(values))
|
||||
}
|
||||
TableRPC::Update(pairs) => {
|
||||
self.handle_update(pairs).await?;
|
||||
self.handle_update(pairs)?;
|
||||
Ok(TableRPC::Ok)
|
||||
}
|
||||
TableRPC::SyncRPC(rpc) => {
|
||||
|
@ -380,53 +402,64 @@ where
|
|||
Ok(ret)
|
||||
}
|
||||
|
||||
pub async fn handle_update(self: &Arc<Self>, entries: &[Arc<ByteBuf>]) -> Result<(), Error> {
|
||||
let syncer = self.syncer.load_full().unwrap();
|
||||
// ========== CODE THAT ACTUALLY MODIFIES THE TREE ================
|
||||
|
||||
pub fn handle_update(self: &Arc<Self>, entries: &[Arc<ByteBuf>]) -> Result<(), Error> {
|
||||
for update_bytes in entries.iter() {
|
||||
let update = self.decode_entry(update_bytes.as_slice())?;
|
||||
self.update_entry(update_bytes.as_slice())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
let tree_key = self.tree_key(update.partition_key(), update.sort_key());
|
||||
pub(crate) fn update_entry(self: &Arc<Self>, update_bytes: &[u8]) -> Result<(), Error> {
|
||||
let update = self.decode_entry(update_bytes)?;
|
||||
let tree_key = self.tree_key(update.partition_key(), update.sort_key());
|
||||
|
||||
let (old_entry, new_entry) = self.store.transaction(|db| {
|
||||
let (old_entry, new_entry) = match db.get(&tree_key)? {
|
||||
Some(prev_bytes) => {
|
||||
let old_entry = self
|
||||
.decode_entry(&prev_bytes)
|
||||
.map_err(sled::transaction::ConflictableTransactionError::Abort)?;
|
||||
let mut new_entry = old_entry.clone();
|
||||
new_entry.merge(&update);
|
||||
(Some(old_entry), new_entry)
|
||||
}
|
||||
None => (None, update.clone()),
|
||||
};
|
||||
let changed = (&self.store, &self.merkle_updater.todo).transaction(|(db, mkl_todo)| {
|
||||
let (old_entry, new_entry) = match db.get(&tree_key)? {
|
||||
Some(prev_bytes) => {
|
||||
let old_entry = self
|
||||
.decode_entry(&prev_bytes)
|
||||
.map_err(sled::transaction::ConflictableTransactionError::Abort)?;
|
||||
let mut new_entry = old_entry.clone();
|
||||
new_entry.merge(&update);
|
||||
(Some(old_entry), new_entry)
|
||||
}
|
||||
None => (None, update.clone()),
|
||||
};
|
||||
|
||||
if Some(&new_entry) != old_entry.as_ref() {
|
||||
let new_bytes = rmp_to_vec_all_named(&new_entry)
|
||||
.map_err(Error::RMPEncode)
|
||||
.map_err(sled::transaction::ConflictableTransactionError::Abort)?;
|
||||
mkl_todo.insert(tree_key.clone(), blake2sum(&new_bytes[..]).to_vec())?;
|
||||
db.insert(tree_key.clone(), new_bytes)?;
|
||||
Ok((old_entry, new_entry))
|
||||
})?;
|
||||
|
||||
if old_entry.as_ref() != Some(&new_entry) {
|
||||
self.instance.updated(old_entry, Some(new_entry));
|
||||
syncer.invalidate(&tree_key[..]);
|
||||
Ok(Some((old_entry, new_entry)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
})?;
|
||||
|
||||
if let Some((old_entry, new_entry)) = changed {
|
||||
self.instance.updated(old_entry, Some(new_entry));
|
||||
self.syncer.load_full().unwrap().invalidate(&tree_key[..]);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn delete_if_equal(self: &Arc<Self>, k: &[u8], v: &[u8]) -> Result<bool, Error> {
|
||||
let removed = self.store.transaction(|txn| {
|
||||
let removed = (&self.store, &self.merkle_updater.todo).transaction(|(txn, mkl_todo)| {
|
||||
if let Some(cur_v) = txn.get(k)? {
|
||||
if cur_v == v {
|
||||
txn.remove(k)?;
|
||||
mkl_todo.insert(k, vec![])?;
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
})?;
|
||||
|
||||
if removed {
|
||||
let old_entry = self.decode_entry(v)?;
|
||||
self.instance.updated(Some(old_entry), None);
|
||||
|
|
|
@ -106,7 +106,7 @@ where
|
|||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
pub(crate) async fn launch(table: Arc<Table<F, R>>) -> Arc<Self> {
|
||||
pub(crate) fn launch(table: Arc<Table<F, R>>) -> Arc<Self> {
|
||||
let todo = SyncTodo { todo: Vec::new() };
|
||||
let syncer = Arc::new(TableSyncer {
|
||||
table: table.clone(),
|
||||
|
@ -119,24 +119,16 @@ where
|
|||
let (busy_tx, busy_rx) = mpsc::unbounded_channel();
|
||||
|
||||
let s1 = syncer.clone();
|
||||
table
|
||||
.system
|
||||
.background
|
||||
.spawn_worker(
|
||||
format!("table sync watcher for {}", table.name),
|
||||
move |must_exit: watch::Receiver<bool>| s1.watcher_task(must_exit, busy_rx),
|
||||
)
|
||||
.await;
|
||||
table.system.background.spawn_worker(
|
||||
format!("table sync watcher for {}", table.name),
|
||||
move |must_exit: watch::Receiver<bool>| s1.watcher_task(must_exit, busy_rx),
|
||||
);
|
||||
|
||||
let s2 = syncer.clone();
|
||||
table
|
||||
.system
|
||||
.background
|
||||
.spawn_worker(
|
||||
format!("table syncer for {}", table.name),
|
||||
move |must_exit: watch::Receiver<bool>| s2.syncer_task(must_exit, busy_tx),
|
||||
)
|
||||
.await;
|
||||
table.system.background.spawn_worker(
|
||||
format!("table syncer for {}", table.name),
|
||||
move |must_exit: watch::Receiver<bool>| s2.syncer_task(must_exit, busy_tx),
|
||||
);
|
||||
|
||||
let s3 = syncer.clone();
|
||||
tokio::spawn(async move {
|
||||
|
@ -630,7 +622,7 @@ where
|
|||
}
|
||||
}
|
||||
if diff_items.len() > 0 {
|
||||
self.table.handle_update(&diff_items[..]).await?;
|
||||
self.table.handle_update(&diff_items[..])?;
|
||||
}
|
||||
if items_to_send.len() > 0 {
|
||||
self.send_items(who, items_to_send).await?;
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
use core::future::Future;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use futures::future::join_all;
|
||||
use futures::select;
|
||||
use futures_util::future::*;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::sync::{mpsc, watch, Notify};
|
||||
|
||||
use crate::error::Error;
|
||||
|
@ -38,7 +38,7 @@ impl BackgroundRunner {
|
|||
}
|
||||
|
||||
pub async fn run(self: Arc<Self>) {
|
||||
let mut workers = self.workers.lock().await;
|
||||
let mut workers = self.workers.lock().unwrap();
|
||||
for i in 0..self.n_runners {
|
||||
workers.push(tokio::spawn(self.clone().runner(i)));
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ impl BackgroundRunner {
|
|||
let mut stop_signal = self.stop_signal.clone();
|
||||
while let Some(exit_now) = stop_signal.recv().await {
|
||||
if exit_now {
|
||||
let mut workers = self.workers.lock().await;
|
||||
let mut workers = self.workers.lock().unwrap();
|
||||
let workers_vec = workers.drain(..).collect::<Vec<_>>();
|
||||
join_all(workers_vec).await;
|
||||
return;
|
||||
|
@ -73,12 +73,12 @@ impl BackgroundRunner {
|
|||
self.job_notify.notify();
|
||||
}
|
||||
|
||||
pub async fn spawn_worker<F, T>(&self, name: String, worker: F)
|
||||
pub fn spawn_worker<F, T>(&self, name: String, worker: F)
|
||||
where
|
||||
F: FnOnce(watch::Receiver<bool>) -> T + Send + 'static,
|
||||
T: Future<Output = JobOutput> + Send + 'static,
|
||||
{
|
||||
let mut workers = self.workers.lock().await;
|
||||
let mut workers = self.workers.lock().unwrap();
|
||||
let stop_signal = self.stop_signal.clone();
|
||||
workers.push(tokio::spawn(async move {
|
||||
if let Err(e) = worker(stop_signal).await {
|
||||
|
@ -93,7 +93,7 @@ impl BackgroundRunner {
|
|||
let mut stop_signal = self.stop_signal.clone();
|
||||
loop {
|
||||
let must_exit: bool = *stop_signal.borrow();
|
||||
if let Some(job) = self.dequeue_job(must_exit).await {
|
||||
if let Some(job) = self.dequeue_job(must_exit) {
|
||||
if let Err(e) = job.await {
|
||||
error!("Job failed: {}", e)
|
||||
}
|
||||
|
@ -110,8 +110,8 @@ impl BackgroundRunner {
|
|||
}
|
||||
}
|
||||
|
||||
async fn dequeue_job(&self, must_exit: bool) -> Option<Job> {
|
||||
let mut queue = self.queue_out.lock().await;
|
||||
fn dequeue_job(&self, must_exit: bool) -> Option<Job> {
|
||||
let mut queue = self.queue_out.lock().unwrap();
|
||||
while let Ok((job, cancellable)) = queue.try_recv() {
|
||||
if cancellable && must_exit {
|
||||
continue;
|
||||
|
|
Loading…
Reference in a new issue