WIP: try to fix #93, and improve S3 ListObjects (v1 and v2) API calls

This commit is contained in:
Alex Auvolat 2021-10-08 18:35:38 +02:00
parent 1aed317818
commit f3a097abdf
No known key found for this signature in database
GPG key ID: EDABF9711E244EB1

View file

@ -68,26 +68,63 @@ pub async fn handle_list(
let mut result_keys = BTreeMap::<String, ListResultInfo>::new(); let mut result_keys = BTreeMap::<String, ListResultInfo>::new();
let mut result_common_prefixes = BTreeSet::<String>::new(); let mut result_common_prefixes = BTreeSet::<String>::new();
let mut next_chunk_start = if query.is_v2 { // Determine the key from where we want to start fetch objects
// from the database, and whether the object at this key must
// be included or excluded from the response.
// This key can be the prefix in the base case, or intermediate
// points in the dataset if we are continuing a previous listing.
#[allow(clippy::collapsible_else_if)]
let (mut next_chunk_start, mut next_chunk_exclude_start) = if query.is_v2 {
if let Some(ct) = &query.continuation_token { if let Some(ct) = &query.continuation_token {
String::from_utf8(base64::decode(ct.as_bytes())?)? // In V2 mode, the continuation token is defined as an opaque
// string in the spec, so we can do whatever we want with it.
// In our case, it is defined as either [ or ] (for include
// and exclude, respectively), followed by a base64 string
// representing the key to start with.
let exclude = match &ct[..1] {
"[" => false,
"]" => true,
_ => return Err(Error::BadRequest("Invalid continuation token".to_string())),
};
(
String::from_utf8(base64::decode(ct[1..].as_bytes())?)?,
exclude,
)
} else if let Some(sa) = &query.start_after {
// StartAfter has defined semantics in the spec:
// start listing at the first key immediately after.
(sa.clone(), true)
} else { } else {
query // In the case where neither is specified, we start
.start_after // listing at the specified prefix. If an object has this
.clone() // exact same key, we include it. (TODO is this correct?)
.unwrap_or_else(|| query.prefix.clone()) (query.prefix.clone(), false)
} }
} else { } else {
query.marker.clone().unwrap_or_else(|| query.prefix.clone()) if let Some(mk) = &query.marker {
// In V1 mode, the spec defines the Marker value to mean
// the same thing as the StartAfter value in V2 mode.
(mk.clone(), true)
} else {
// Base case, same as in V2 mode
(query.prefix.clone(), false)
}
}; };
debug!( debug!(
"List request: `{:?}` {} `{}`", "List request: `{:?}` {} `{}`, start from {}, exclude first {}",
query.delimiter, query.max_keys, query.prefix query.delimiter, query.max_keys, query.prefix, next_chunk_start, next_chunk_exclude_start
); );
// `truncated` is a boolean that determines whether there are
// more items to be added.
let truncated; let truncated;
// `last_processed_item` is the key of the last item
// that was included in the listing before truncating.
let mut last_processed_item = None;
'query_loop: loop { 'query_loop: loop {
// Fetch objects
let objects = garage let objects = garage
.object_table .object_table
.get_range( .get_range(
@ -103,64 +140,120 @@ pub async fn handle_list(
query.max_keys + 1, query.max_keys + 1,
objects.len() objects.len()
); );
let current_chunk_start = next_chunk_start.clone();
// Iterate on returned objects and add them to the response.
// If a delimiter is specified, we take care of grouping objects
// into CommonPrefixes.
for object in objects.iter() { for object in objects.iter() {
// If we have retrieved an object that doesn't start with
// the prefix, we know we have finished listing our stuff.
if !object.key.starts_with(&query.prefix) { if !object.key.starts_with(&query.prefix) {
truncated = None; truncated = false;
break 'query_loop; break 'query_loop;
} }
if query.is_v2 && query.start_after.as_ref() == Some(&object.key) { // Exclude the starting key if we have to.
if object.key == next_chunk_start && next_chunk_exclude_start {
continue; continue;
} }
if let Some(version) = object.versions().iter().find(|x| x.is_data()) { // Find if this object has a currently valid (non-deleted,
if result_keys.len() + result_common_prefixes.len() >= query.max_keys { // non-still-uploading) version. If not, skip it.
truncated = Some(object.key.to_string()); let version = match object.versions().iter().find(|x| x.is_data()) {
break 'query_loop; Some(v) => v,
} None => continue,
let common_prefix = if let Some(delimiter) = &query.delimiter { };
let relative_key = &object.key[query.prefix.len()..];
relative_key // If we don't have space to add this object to our response,
.find(delimiter) // we will need to stop here and mark the key of this object
.map(|i| &object.key[..query.prefix.len() + i + delimiter.len()]) // as the marker from where
} else { // we want to start again in the next list call.
None let cannot_add = result_keys.len() + result_common_prefixes.len() >= query.max_keys;
};
if let Some(pfx) = common_prefix { // Determine whether this object should be grouped inside
// a CommonPrefix because it contains the delimiter,
// or if it should be returned as an object.
let common_prefix = match &query.delimiter {
Some(delimiter) => object.key[query.prefix.len()..]
.find(delimiter)
.map(|i| &object.key[..query.prefix.len() + i + delimiter.len()]),
None => None,
};
if let Some(pfx) = common_prefix {
// In the case where this object must be grouped in a
// common prefix, handle it here.
if !result_common_prefixes.contains(pfx) {
// Determine the first listing key that starts after
// the common prefix, by finding the next possible
// string by alphabetical order.
let mut first_key_after_prefix = pfx.to_string();
let tail = first_key_after_prefix.pop().unwrap();
first_key_after_prefix.push(((tail as u8) + 1) as char);
// If this were the end of the chunk,
// the next chunk should start after this prefix
next_chunk_start = first_key_after_prefix;
next_chunk_exclude_start = false;
if cannot_add {
truncated = true;
break 'query_loop;
}
result_common_prefixes.insert(pfx.to_string()); result_common_prefixes.insert(pfx.to_string());
} else { }
let meta = match &version.state { last_processed_item = Some(object.key.clone());
ObjectVersionState::Complete(ObjectVersionData::Inline(meta, _)) => meta, continue;
ObjectVersionState::Complete(ObjectVersionData::FirstBlock(meta, _)) => { };
meta
} // This is not a common prefix, we want to add it to our
_ => unreachable!(), // response directly.
}; next_chunk_start = object.key.clone();
let info = match result_keys.get(&object.key) {
None => ListResultInfo { if cannot_add {
last_modified: version.timestamp, truncated = true;
size: meta.size, next_chunk_exclude_start = false;
etag: meta.etag.to_string(), break 'query_loop;
},
Some(_lri) => {
return Err(Error::InternalError(GarageError::Message(format!(
"Duplicate key?? {}",
object.key
))))
}
};
result_keys.insert(object.key.clone(), info);
};
} }
let meta = match &version.state {
ObjectVersionState::Complete(ObjectVersionData::Inline(meta, _)) => meta,
ObjectVersionState::Complete(ObjectVersionData::FirstBlock(meta, _)) => meta,
_ => unreachable!(),
};
let info = match result_keys.get(&object.key) {
None => ListResultInfo {
last_modified: version.timestamp,
size: meta.size,
etag: meta.etag.to_string(),
},
Some(_lri) => {
return Err(Error::InternalError(GarageError::Message(format!(
"Duplicate key?? {} (this is a bug, please report it)",
object.key
))))
}
};
result_keys.insert(object.key.clone(), info);
last_processed_item = Some(object.key.clone());
next_chunk_exclude_start = true;
} }
// If our database returned less objects than what we were asking for,
// it means that no more objects are in the bucket. So we stop here.
if objects.len() < query.max_keys + 1 { if objects.len() < query.max_keys + 1 {
truncated = None; truncated = false;
break 'query_loop; break 'query_loop;
} }
if !objects.is_empty() {
next_chunk_start = objects[objects.len() - 1].key.clone(); // Sanity check: we should have added at least an object
// or a prefix to our returned result.
if next_chunk_start == current_chunk_start || last_processed_item.is_none() {
return Err(Error::InternalError(GarageError::Message(format!(
"S3 ListObject: made no progress, still starting at {} (this is a bug, please report it)", next_chunk_start))));
} }
// Loop and fetch more objects
} }
let mut result = s3_xml::ListBucketResult { let mut result = s3_xml::ListBucketResult {
@ -181,11 +274,10 @@ pub async fn handle_list(
true => Some(s3_xml::Value("url".to_string())), true => Some(s3_xml::Value("url".to_string())),
false => None, false => None,
}, },
key_count: Some(s3_xml::IntValue( key_count: Some(s3_xml::IntValue(
result_keys.len() as i64 + result_common_prefixes.len() as i64, result_keys.len() as i64 + result_common_prefixes.len() as i64,
)), )),
is_truncated: s3_xml::Value(format!("{}", truncated.is_some())), is_truncated: s3_xml::Value(format!("{}", truncated)),
contents: vec![], contents: vec![],
common_prefixes: vec![], common_prefixes: vec![],
}; };
@ -197,16 +289,27 @@ pub async fn handle_list(
if let Some(sa) = &query.start_after { if let Some(sa) = &query.start_after {
result.start_after = Some(uriencode_maybe(sa, query.urlencode_resp)); result.start_after = Some(uriencode_maybe(sa, query.urlencode_resp));
} }
if let Some(nct) = truncated { if truncated {
result.next_continuation_token = Some(s3_xml::Value(base64::encode(nct.as_bytes()))); let b64 = base64::encode(next_chunk_start.as_bytes());
let nct = if next_chunk_exclude_start {
format!("]{}", b64)
} else {
format!("[{}", b64)
};
result.next_continuation_token = Some(s3_xml::Value(nct));
} }
} else { } else {
// TODO: are these supposed to be urlencoded when encoding-type is URL?? // TODO: are these supposed to be urlencoded when encoding-type is URL??
if let Some(mkr) = &query.marker { if let Some(mkr) = &query.marker {
result.marker = Some(uriencode_maybe(mkr, query.urlencode_resp)); result.marker = Some(uriencode_maybe(mkr, query.urlencode_resp));
} }
if let Some(next_marker) = truncated { if truncated {
result.next_marker = Some(uriencode_maybe(&next_marker, query.urlencode_resp)); if let Some(lpi) = last_processed_item {
result.next_marker = Some(uriencode_maybe(&lpi, query.urlencode_resp));
} else {
return Err(Error::InternalError(GarageError::Message(
"S3 ListObject: last_processed_item is None but the response was truncated, indicating that many items were processed (this is a bug, please report it)".to_string())));
}
} }
} }
@ -221,7 +324,6 @@ pub async fn handle_list(
} }
for pfx in result_common_prefixes.iter() { for pfx in result_common_prefixes.iter() {
//TODO: in V1, are these urlencoded when urlencode_resp is true ?? (proably)
result.common_prefixes.push(s3_xml::CommonPrefix { result.common_prefixes.push(s3_xml::CommonPrefix {
prefix: uriencode_maybe(pfx, query.urlencode_resp), prefix: uriencode_maybe(pfx, query.urlencode_resp),
}); });