Improve concurrency for upload streams

This commit is contained in:
asonix 2022-09-24 22:07:06 -05:00
parent 29a998a665
commit bf3c47e457

View file

@ -3,8 +3,9 @@ use crate::{
repo::{Repo, SettingsRepo}, repo::{Repo, SettingsRepo},
store::{Store, StoreConfig}, store::{Store, StoreConfig},
}; };
use actix_rt::task::JoinError;
use actix_web::{ use actix_web::{
error::PayloadError, error::{BlockingError, PayloadError},
http::{ http::{
header::{ByteRangeSpec, Range, CONTENT_LENGTH}, header::{ByteRangeSpec, Range, CONTENT_LENGTH},
StatusCode, StatusCode,
@ -18,6 +19,7 @@ use std::{pin::Pin, string::FromUtf8Error, time::Duration};
use storage_path_generator::{Generator, Path}; use storage_path_generator::{Generator, Path};
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
use tokio_util::io::ReaderStream; use tokio_util::io::ReaderStream;
use tracing::Instrument;
use url::Url; use url::Url;
mod object_id; mod object_id;
@ -53,6 +55,9 @@ pub(crate) enum ObjectError {
#[error("Invalid etag response")] #[error("Invalid etag response")]
Etag, Etag,
#[error("Task cancelled")]
Cancelled,
#[error("Invalid status: {0}\n{1}")] #[error("Invalid status: {0}\n{1}")]
Status(StatusCode, String), Status(StatusCode, String),
} }
@ -63,6 +68,18 @@ impl From<SendRequestError> for ObjectError {
} }
} }
impl From<JoinError> for ObjectError {
fn from(_: JoinError) -> Self {
Self::Cancelled
}
}
impl From<BlockingError> for ObjectError {
fn from(_: BlockingError) -> Self {
Self::Cancelled
}
}
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct ObjectStore { pub(crate) struct ObjectStore {
path_gen: Generator, path_gen: Generator,
@ -111,6 +128,34 @@ fn payload_to_io_error(e: PayloadError) -> std::io::Error {
} }
} }
#[tracing::instrument(skip(stream))]
async fn read_chunk<S>(stream: &mut S) -> std::io::Result<Bytes>
where
S: Stream<Item = std::io::Result<Bytes>> + Unpin + 'static,
{
let mut buf = Vec::new();
let mut total_len = 0;
while total_len < CHUNK_SIZE {
if let Some(res) = stream.next().await {
let bytes = res?;
total_len += bytes.len();
buf.push(bytes);
} else {
break;
}
}
let bytes = buf
.iter()
.fold(BytesMut::with_capacity(total_len), |mut acc, item| {
acc.extend_from_slice(item);
acc
});
Ok(bytes.freeze())
}
#[async_trait::async_trait(?Send)] #[async_trait::async_trait(?Send)]
impl Store for ObjectStore { impl Store for ObjectStore {
type Identifier = ObjectId; type Identifier = ObjectId;
@ -144,43 +189,61 @@ impl Store for ObjectStore {
let upload_id = &body.upload_id; let upload_id = &body.upload_id;
let res = async { let res = async {
let mut etags = Vec::new();
let mut complete = false; let mut complete = false;
let mut part_number = 0; let mut part_number = 0;
let mut futures = Vec::new();
while !complete { while !complete {
part_number += 1; part_number += 1;
let mut bytes = BytesMut::with_capacity(CHUNK_SIZE);
while bytes.len() < CHUNK_SIZE { let bytes = read_chunk(&mut stream).await?;
if let Some(res) = stream.next().await { complete = bytes.len() < CHUNK_SIZE;
bytes.extend_from_slice(&res?);
} else { let this = self.clone();
complete = true;
break; let object_id2 = object_id.clone();
let upload_id2 = upload_id.clone();
let handle = actix_rt::spawn(
async move {
let mut response = this
.create_upload_part_request(
bytes.clone(),
&object_id2,
part_number,
&upload_id2,
)
.await?
.send_body(bytes)
.await?;
if !response.status().is_success() {
let body = String::from_utf8_lossy(&response.body().await?).to_string();
return Err(ObjectError::Status(response.status(), body).into());
}
let etag = response
.headers()
.get("etag")
.ok_or(ObjectError::Etag)?
.to_str()
.map_err(|_| ObjectError::Etag)?
.to_string();
drop(response);
Ok(etag) as Result<String, Error>
} }
} .instrument(tracing::info_span!("Upload Part")),
);
let mut response = self futures.push(handle);
.create_upload_part_request(&bytes, &object_id, part_number, upload_id) }
.send_body(bytes)
.await?;
if !response.status().is_success() { let mut etags = Vec::new();
let body = String::from_utf8_lossy(&response.body().await?).to_string();
return Err(ObjectError::Status(response.status(), body).into()); for future in futures {
} etags.push(future.await.map_err(ObjectError::from)??);
let etag = response
.headers()
.get("etag")
.ok_or(ObjectError::Etag)?
.to_str()
.map_err(|_| ObjectError::Etag)?
.to_string();
etags.push(etag);
} }
let mut response = self let mut response = self
@ -375,13 +438,13 @@ impl ObjectStore {
Ok((self.build_request(action), ObjectId::from_string(path))) Ok((self.build_request(action), ObjectId::from_string(path)))
} }
fn create_upload_part_request( async fn create_upload_part_request(
&self, &self,
bytes: &[u8], bytes: Bytes,
object_id: &ObjectId, object_id: &ObjectId,
part_number: u16, part_number: u16,
upload_id: &str, upload_id: &str,
) -> ClientRequest { ) -> Result<ClientRequest, Error> {
use md5::Digest; use md5::Digest;
let mut action = self.bucket.upload_part( let mut action = self.bucket.upload_part(
@ -391,17 +454,25 @@ impl ObjectStore {
upload_id, upload_id,
); );
let mut hasher = md5::Md5::new(); let hashing_span = tracing::info_span!("Hashing request body");
hasher.update(bytes); let hash_string = actix_web::web::block(move || {
let hash = hasher.finalize(); let guard = hashing_span.enter();
let hash_string = base64::encode(&hash); let mut hasher = md5::Md5::new();
hasher.update(&bytes);
let hash = hasher.finalize();
let hash_string = base64::encode(&hash);
drop(guard);
hash_string
})
.await
.map_err(ObjectError::from)?;
action action
.headers_mut() .headers_mut()
.insert("content-type", "application/octet-stream"); .insert("content-type", "application/octet-stream");
action.headers_mut().insert("content-md5", hash_string); action.headers_mut().insert("content-md5", hash_string);
self.build_request(action) Ok(self.build_request(action))
} }
fn send_complete_multipart_request<'a, I: Iterator<Item = &'a str>>( fn send_complete_multipart_request<'a, I: Iterator<Item = &'a str>>(