From 71cd3a31f9b3f4b86afe9c24645a3c8211ec7905 Mon Sep 17 00:00:00 2001 From: Rob Ede Date: Mon, 1 Jul 2024 03:55:08 +0100 Subject: [PATCH] fix(multipart): optional content-disposition for non-form-data requests (#3416) --- actix-multipart-derive/src/lib.rs | 6 +- actix-multipart/CHANGES.md | 9 + actix-multipart/Cargo.toml | 2 + actix-multipart/README.md | 12 +- actix-multipart/examples/form.rs | 36 ++ actix-multipart/src/error.rs | 96 +++-- actix-multipart/src/extractor.rs | 6 +- actix-multipart/src/form/bytes.rs | 3 +- actix-multipart/src/form/json.rs | 7 +- actix-multipart/src/form/mod.rs | 83 +++- actix-multipart/src/form/tempfile.rs | 28 +- actix-multipart/src/form/text.rs | 9 +- actix-multipart/src/lib.rs | 16 +- actix-multipart/src/server.rs | 406 +++++++++++------- actix-multipart/src/test.rs | 2 + .../src/http/header/content_disposition.rs | 6 +- 16 files changed, 479 insertions(+), 248 deletions(-) create mode 100644 actix-multipart/examples/form.rs diff --git a/actix-multipart-derive/src/lib.rs b/actix-multipart-derive/src/lib.rs index 9552ad2d9..44db1fa0e 100644 --- a/actix-multipart-derive/src/lib.rs +++ b/actix-multipart-derive/src/lib.rs @@ -138,7 +138,7 @@ struct ParsedField<'t> { /// `#[multipart(duplicate_field = "")]` attribute: /// /// - "ignore": (default) Extra fields are ignored. I.e., the first one is persisted. -/// - "deny": A `MultipartError::UnsupportedField` error response is returned. +/// - "deny": A `MultipartError::UnknownField` error response is returned. /// - "replace": Each field is processed, but only the last one is persisted. /// /// Note that `Vec` fields will ignore this option. @@ -229,7 +229,7 @@ pub fn impl_multipart_form(input: proc_macro::TokenStream) -> proc_macro::TokenS // Return value when a field name is not supported by the form let unknown_field_result = if attrs.deny_unknown_fields { quote!(::std::result::Result::Err( - ::actix_multipart::MultipartError::UnsupportedField(field.name().to_string()) + ::actix_multipart::MultipartError::UnknownField(field.name().unwrap().to_string()) )) } else { quote!(::std::result::Result::Ok(())) @@ -292,7 +292,7 @@ pub fn impl_multipart_form(input: proc_macro::TokenStream) -> proc_macro::TokenS limits: &'t mut ::actix_multipart::form::Limits, state: &'t mut ::actix_multipart::form::State, ) -> ::std::pin::Pin<::std::boxed::Box> + 't>> { - match field.name() { + match field.name().unwrap() { #handle_field_impl _ => return ::std::boxed::Box::pin(::std::future::ready(#unknown_field_result)), } diff --git a/actix-multipart/CHANGES.md b/actix-multipart/CHANGES.md index a91edf9c8..dea24ab9d 100644 --- a/actix-multipart/CHANGES.md +++ b/actix-multipart/CHANGES.md @@ -2,6 +2,15 @@ ## Unreleased +- Add `MultipartError::ContentTypeIncompatible` variant. +- Add `MultipartError::ContentDispositionNameMissing` variant. +- Rename `MultipartError::{NoContentDisposition => ContentDispositionMissing}` variant. +- Rename `MultipartError::{NoContentType => ContentTypeMissing}` variant. +- Rename `MultipartError::{ParseContentType => ContentTypeParse}` variant. +- Rename `MultipartError::{Boundary => BoundaryMissing}` variant. +- Rename `MultipartError::{UnsupportedField => UnknownField}` variant. +- Remove top-level re-exports of `test` utilities. + ## 0.6.2 - Add testing utilities under new module `test`. diff --git a/actix-multipart/Cargo.toml b/actix-multipart/Cargo.toml index 5e9b78d84..c5ff7dd10 100644 --- a/actix-multipart/Cargo.toml +++ b/actix-multipart/Cargo.toml @@ -63,7 +63,9 @@ actix-multipart-rfc7578 = "0.10" actix-rt = "2.2" actix-test = "0.1" actix-web = "4" +assert_matches = "1" awc = "3" +env_logger = "0.11" futures-util = { version = "0.3.17", default-features = false, features = ["alloc"] } multer = "3" tokio = { version = "1.24.2", features = ["sync"] } diff --git a/actix-multipart/README.md b/actix-multipart/README.md index d61347f32..917dceece 100644 --- a/actix-multipart/README.md +++ b/actix-multipart/README.md @@ -54,15 +54,15 @@ async fn main() -> std::io::Result<()> { } ``` - +cURL request: -[More available in the examples repo →](https://github.com/actix/examples/tree/master/forms/multipart) - -Curl request : - -```bash +```sh curl -v --request POST \ --url http://localhost:8080/videos \ -F 'json={"name": "Cargo.lock"};type=application/json' \ -F file=@./Cargo.lock ``` + + + +[More available in the examples repo →](https://github.com/actix/examples/tree/master/forms/multipart) diff --git a/actix-multipart/examples/form.rs b/actix-multipart/examples/form.rs new file mode 100644 index 000000000..a90aeff96 --- /dev/null +++ b/actix-multipart/examples/form.rs @@ -0,0 +1,36 @@ +use actix_multipart::form::{json::Json as MpJson, tempfile::TempFile, MultipartForm}; +use actix_web::{middleware::Logger, post, App, HttpServer, Responder}; +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +struct Metadata { + name: String, +} + +#[derive(Debug, MultipartForm)] +struct UploadForm { + #[multipart(limit = "100MB")] + file: TempFile, + json: MpJson, +} + +#[post("/videos")] +async fn post_video(MultipartForm(form): MultipartForm) -> impl Responder { + format!( + "Uploaded file {}, with size: {}\ntemporary file ({}) was deleted\n", + form.json.name, + form.file.size, + form.file.file.path().display(), + ) +} + +#[actix_web::main] +async fn main() -> std::io::Result<()> { + env_logger::init_from_env(env_logger::Env::new().default_filter_or("info")); + + HttpServer::new(move || App::new().service(post_video).wrap(Logger::default())) + .workers(2) + .bind(("127.0.0.1", 8080))? + .run() + .await +} diff --git a/actix-multipart/src/error.rs b/actix-multipart/src/error.rs index 77b5a559f..30ef63c1a 100644 --- a/actix-multipart/src/error.rs +++ b/actix-multipart/src/error.rs @@ -11,77 +11,95 @@ use derive_more::{Display, Error, From}; #[derive(Debug, Display, From, Error)] #[non_exhaustive] pub enum MultipartError { - /// Content-Disposition header is not found or is not equal to "form-data". + /// Could not find Content-Type header. + #[display(fmt = "Could not find Content-Type header")] + ContentTypeMissing, + + /// Could not parse Content-Type header. + #[display(fmt = "Could not parse Content-Type header")] + ContentTypeParse, + + /// Parsed Content-Type did not have "multipart" top-level media type. /// - /// According to [RFC 7578 §4.2](https://datatracker.ietf.org/doc/html/rfc7578#section-4.2) a - /// Content-Disposition header must always be present and equal to "form-data". - #[display(fmt = "No Content-Disposition `form-data` header")] - NoContentDisposition, + /// Also raised when extracting a [`MultipartForm`] from a request that does not have the + /// "multipart/form-data" media type. + /// + /// [`MultipartForm`]: struct@crate::form::MultipartForm + #[display(fmt = "Parsed Content-Type did not have "multipart" top-level media type")] + ContentTypeIncompatible, - /// Content-Type header is not found - #[display(fmt = "No Content-Type header found")] - NoContentType, - - /// Can not parse Content-Type header - #[display(fmt = "Can not parse Content-Type header")] - ParseContentType, - - /// Multipart boundary is not found + /// Multipart boundary is not found. #[display(fmt = "Multipart boundary is not found")] - Boundary, + BoundaryMissing, - /// Nested multipart is not supported + /// Content-Disposition header was not found or not of disposition type "form-data" when parsing + /// a "form-data" field. + /// + /// As per [RFC 7578 §4.2], a "multipart/form-data" field's Content-Disposition header must + /// always be present and have a disposition type of "form-data". + /// + /// [RFC 7578 §4.2]: https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 + #[display(fmt = "Content-Disposition header was not found when parsing a \"form-data\" field")] + ContentDispositionMissing, + + /// Content-Disposition name parameter was not found when parsing a "form-data" field. + /// + /// As per [RFC 7578 §4.2], a "multipart/form-data" field's Content-Disposition header must + /// always include a "name" parameter. + /// + /// [RFC 7578 §4.2]: https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 + #[display(fmt = "Content-Disposition header was not found when parsing a \"form-data\" field")] + ContentDispositionNameMissing, + + /// Nested multipart is not supported. #[display(fmt = "Nested multipart is not supported")] Nested, - /// Multipart stream is incomplete + /// Multipart stream is incomplete. #[display(fmt = "Multipart stream is incomplete")] Incomplete, - /// Error during field parsing - #[display(fmt = "{}", _0)] + /// Field parsing failed. + #[display(fmt = "Error during field parsing")] Parse(ParseError), - /// Payload error - #[display(fmt = "{}", _0)] + /// HTTP payload error. + #[display(fmt = "Payload error")] Payload(PayloadError), - /// Not consumed - #[display(fmt = "Multipart stream is not consumed")] + /// Stream is not consumed. + #[display(fmt = "Stream is not consumed")] NotConsumed, - /// An error from a field handler in a form - #[display( - fmt = "An error occurred processing field `{}`: {}", - field_name, - source - )] + /// Form field handler raised error. + #[display(fmt = "An error occurred processing field: {name}")] Field { - field_name: String, + name: String, source: actix_web::Error, }, - /// Duplicate field - #[display(fmt = "Duplicate field found for: `{}`", _0)] + /// Duplicate field found (for structure that opted-in to denying duplicate fields). + #[display(fmt = "Duplicate field found: {_0}")] #[from(ignore)] DuplicateField(#[error(not(source))] String), - /// Missing field - #[display(fmt = "Field with name `{}` is required", _0)] + /// Required field is missing. + #[display(fmt = "Required field is missing: {_0}")] #[from(ignore)] MissingField(#[error(not(source))] String), - /// Unknown field - #[display(fmt = "Unsupported field `{}`", _0)] + /// Unknown field (for structure that opted-in to denying unknown fields). + #[display(fmt = "Unknown field: {_0}")] #[from(ignore)] - UnsupportedField(#[error(not(source))] String), + UnknownField(#[error(not(source))] String), } -/// Return `BadRequest` for `MultipartError` +/// Return `BadRequest` for `MultipartError`. impl ResponseError for MultipartError { fn status_code(&self) -> StatusCode { match &self { MultipartError::Field { source, .. } => source.as_response_error().status_code(), + MultipartError::ContentTypeIncompatible => StatusCode::UNSUPPORTED_MEDIA_TYPE, _ => StatusCode::BAD_REQUEST, } } @@ -93,7 +111,7 @@ mod tests { #[test] fn test_multipart_error() { - let resp = MultipartError::Boundary.error_response(); + let resp = MultipartError::BoundaryMissing.error_response(); assert_eq!(resp.status(), StatusCode::BAD_REQUEST); } } diff --git a/actix-multipart/src/extractor.rs b/actix-multipart/src/extractor.rs index 56ed69ae4..ab98f887e 100644 --- a/actix-multipart/src/extractor.rs +++ b/actix-multipart/src/extractor.rs @@ -10,6 +10,7 @@ use crate::server::Multipart; /// Content-type: multipart/form-data; /// /// # Examples +/// /// ``` /// use actix_web::{web, HttpResponse, Error}; /// use actix_multipart::Multipart; @@ -35,9 +36,6 @@ impl FromRequest for Multipart { #[inline] fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future { - ready(Ok(match Multipart::boundary(req.headers()) { - Ok(boundary) => Multipart::from_boundary(boundary, payload.take()), - Err(err) => Multipart::from_error(err), - })) + ready(Ok(Multipart::from_req(req, payload))) } } diff --git a/actix-multipart/src/form/bytes.rs b/actix-multipart/src/form/bytes.rs index 3c5e2eb10..c152db3d0 100644 --- a/actix-multipart/src/form/bytes.rs +++ b/actix-multipart/src/form/bytes.rs @@ -41,8 +41,9 @@ impl<'t> FieldReader<'t> for Bytes { content_type: field.content_type().map(ToOwned::to_owned), file_name: field .content_disposition() + .expect("multipart form fields should have a content-disposition header") .get_filename() - .map(str::to_owned), + .map(ToOwned::to_owned), }) }) } diff --git a/actix-multipart/src/form/json.rs b/actix-multipart/src/form/json.rs index bb4e03bf6..3504c340c 100644 --- a/actix-multipart/src/form/json.rs +++ b/actix-multipart/src/form/json.rs @@ -32,7 +32,6 @@ where fn read_field(req: &'t HttpRequest, field: Field, limits: &'t mut Limits) -> Self::Future { Box::pin(async move { let config = JsonConfig::from_req(req); - let field_name = field.name().to_owned(); if config.validate_content_type { let valid = if let Some(mime) = field.content_type() { @@ -43,17 +42,19 @@ where if !valid { return Err(MultipartError::Field { - field_name, + name: field.form_field_name, source: config.map_error(req, JsonFieldError::ContentType), }); } } + let form_field_name = field.form_field_name.clone(); + let bytes = Bytes::read_field(req, field, limits).await?; Ok(Json(serde_json::from_slice(bytes.data.as_ref()).map_err( |err| MultipartError::Field { - field_name, + name: form_field_name, source: config.map_error(req, JsonFieldError::Deserialize(err)), }, )?)) diff --git a/actix-multipart/src/form/mod.rs b/actix-multipart/src/form/mod.rs index 68cdefec5..9441d249f 100644 --- a/actix-multipart/src/form/mod.rs +++ b/actix-multipart/src/form/mod.rs @@ -80,13 +80,13 @@ where state: &'t mut State, duplicate_field: DuplicateField, ) -> Self::Future { - if state.contains_key(field.name()) { + if state.contains_key(&field.form_field_name) { match duplicate_field { DuplicateField::Ignore => return Box::pin(ready(Ok(()))), DuplicateField::Deny => { return Box::pin(ready(Err(MultipartError::DuplicateField( - field.name().to_owned(), + field.form_field_name, )))) } @@ -95,7 +95,7 @@ where } Box::pin(async move { - let field_name = field.name().to_owned(); + let field_name = field.form_field_name.clone(); let t = T::read_field(req, field, limits).await?; state.insert(field_name, Box::new(t)); Ok(()) @@ -123,10 +123,8 @@ where Box::pin(async move { // Note: Vec GroupReader always allows duplicates - let field_name = field.name().to_owned(); - let vec = state - .entry(field_name) + .entry(field.form_field_name.clone()) .or_insert_with(|| Box::>::default()) .downcast_mut::>() .unwrap(); @@ -159,13 +157,13 @@ where state: &'t mut State, duplicate_field: DuplicateField, ) -> Self::Future { - if state.contains_key(field.name()) { + if state.contains_key(&field.form_field_name) { match duplicate_field { DuplicateField::Ignore => return Box::pin(ready(Ok(()))), DuplicateField::Deny => { return Box::pin(ready(Err(MultipartError::DuplicateField( - field.name().to_owned(), + field.form_field_name, )))) } @@ -174,7 +172,7 @@ where } Box::pin(async move { - let field_name = field.name().to_owned(); + let field_name = field.form_field_name.clone(); let t = T::read_field(req, field, limits).await?; state.insert(field_name, Box::new(t)); Ok(()) @@ -281,6 +279,9 @@ impl Limits { /// [`MultipartCollect`] trait. You should use the [`macro@MultipartForm`] macro to derive this /// for your struct. /// +/// Note that this extractor rejects requests with any other Content-Type such as `multipart/mixed`, +/// `multipart/related`, or non-multipart media types. +/// /// Add a [`MultipartFormConfig`] to your app data to configure extraction. #[derive(Deref, DerefMut)] pub struct MultipartForm(pub T); @@ -294,14 +295,24 @@ impl MultipartForm { impl FromRequest for MultipartForm where - T: MultipartCollect, + T: MultipartCollect + 'static, { type Error = Error; type Future = LocalBoxFuture<'static, Result>; #[inline] fn from_request(req: &HttpRequest, payload: &mut dev::Payload) -> Self::Future { - let mut payload = Multipart::new(req.headers(), payload.take()); + let mut multipart = Multipart::from_req(req, payload); + + let content_type = match multipart.content_type_or_bail() { + Ok(content_type) => content_type, + Err(err) => return Box::pin(ready(Err(err.into()))), + }; + + if content_type.subtype() != mime::FORM_DATA { + // this extractor only supports multipart/form-data + return Box::pin(ready(Err(MultipartError::ContentTypeIncompatible.into()))); + }; let config = MultipartFormConfig::from_req(req); let mut limits = Limits::new(config.total_limit, config.memory_limit); @@ -313,14 +324,20 @@ where Box::pin( async move { let mut state = State::default(); - // We need to ensure field limits are shared for all instances of this field name + + // ensure limits are shared for all fields with this name let mut field_limits = HashMap::>::new(); - while let Some(field) = payload.try_next().await? { + while let Some(field) = multipart.try_next().await? { + debug_assert!( + !field.form_field_name.is_empty(), + "multipart form fields should have names", + ); + // Retrieve the limit for this field let entry = field_limits - .entry(field.name().to_owned()) - .or_insert_with(|| T::limit(field.name())); + .entry(field.form_field_name.clone()) + .or_insert_with(|| T::limit(&field.form_field_name)); limits.field_limit_remaining.clone_from(entry); @@ -329,6 +346,7 @@ where // Update the stored limit *entry = limits.field_limit_remaining; } + let inner = T::from_state(state)?; Ok(MultipartForm(inner)) } @@ -752,6 +770,41 @@ mod tests { assert_eq!(response.status(), StatusCode::BAD_REQUEST); } + #[actix_rt::test] + async fn non_multipart_form_data() { + #[derive(MultipartForm)] + struct TestNonMultipartFormData { + #[allow(unused)] + #[multipart(limit = "30B")] + foo: Text, + } + + async fn non_multipart_form_data_route( + _form: MultipartForm, + ) -> String { + unreachable!("request is sent with multipart/mixed"); + } + + let srv = actix_test::start(|| { + App::new().route("/", web::post().to(non_multipart_form_data_route)) + }); + + let mut form = multipart::Form::default(); + form.add_text("foo", "foo"); + + // mangle content-type, keeping the boundary + let ct = form.content_type().replacen("/form-data", "/mixed", 1); + + let res = Client::default() + .post(srv.url("/")) + .content_type(ct) + .send_body(multipart::Body::from(form)) + .await + .unwrap(); + + assert_eq!(res.status(), StatusCode::UNSUPPORTED_MEDIA_TYPE); + } + #[should_panic(expected = "called `Result::unwrap()` on an `Err` value: Connect(Disconnected)")] #[actix_web::test] async fn field_try_next_panic() { diff --git a/actix-multipart/src/form/tempfile.rs b/actix-multipart/src/form/tempfile.rs index 9371a026b..f329876f2 100644 --- a/actix-multipart/src/form/tempfile.rs +++ b/actix-multipart/src/form/tempfile.rs @@ -42,38 +42,36 @@ impl<'t> FieldReader<'t> for TempFile { fn read_field(req: &'t HttpRequest, mut field: Field, limits: &'t mut Limits) -> Self::Future { Box::pin(async move { let config = TempFileConfig::from_req(req); - let field_name = field.name().to_owned(); let mut size = 0; - let file = config - .create_tempfile() - .map_err(|err| config.map_error(req, &field_name, TempFileError::FileIo(err)))?; + let file = config.create_tempfile().map_err(|err| { + config.map_error(req, &field.form_field_name, TempFileError::FileIo(err)) + })?; - let mut file_async = - tokio::fs::File::from_std(file.reopen().map_err(|err| { - config.map_error(req, &field_name, TempFileError::FileIo(err)) - })?); + let mut file_async = tokio::fs::File::from_std(file.reopen().map_err(|err| { + config.map_error(req, &field.form_field_name, TempFileError::FileIo(err)) + })?); while let Some(chunk) = field.try_next().await? { limits.try_consume_limits(chunk.len(), false)?; size += chunk.len(); file_async.write_all(chunk.as_ref()).await.map_err(|err| { - config.map_error(req, &field_name, TempFileError::FileIo(err)) + config.map_error(req, &field.form_field_name, TempFileError::FileIo(err)) })?; } - file_async - .flush() - .await - .map_err(|err| config.map_error(req, &field_name, TempFileError::FileIo(err)))?; + file_async.flush().await.map_err(|err| { + config.map_error(req, &field.form_field_name, TempFileError::FileIo(err)) + })?; Ok(TempFile { file, content_type: field.content_type().map(ToOwned::to_owned), file_name: field .content_disposition() + .expect("multipart form fields should have a content-disposition header") .get_filename() - .map(str::to_owned), + .map(ToOwned::to_owned), size, }) }) @@ -137,7 +135,7 @@ impl TempFileConfig { }; MultipartError::Field { - field_name: field_name.to_owned(), + name: field_name.to_owned(), source, } } diff --git a/actix-multipart/src/form/text.rs b/actix-multipart/src/form/text.rs index 83e211524..67a434ee6 100644 --- a/actix-multipart/src/form/text.rs +++ b/actix-multipart/src/form/text.rs @@ -36,7 +36,6 @@ where fn read_field(req: &'t HttpRequest, field: Field, limits: &'t mut Limits) -> Self::Future { Box::pin(async move { let config = TextConfig::from_req(req); - let field_name = field.name().to_owned(); if config.validate_content_type { let valid = if let Some(mime) = field.content_type() { @@ -49,22 +48,24 @@ where if !valid { return Err(MultipartError::Field { - field_name, + name: field.form_field_name, source: config.map_error(req, TextError::ContentType), }); } } + let form_field_name = field.form_field_name.clone(); + let bytes = Bytes::read_field(req, field, limits).await?; let text = str::from_utf8(&bytes.data).map_err(|err| MultipartError::Field { - field_name: field_name.clone(), + name: form_field_name.clone(), source: config.map_error(req, TextError::Utf8Error(err)), })?; Ok(Text(serde_plain::from_str(text).map_err(|err| { MultipartError::Field { - field_name, + name: form_field_name, source: config.map_error(req, TextError::Deserialize(err)), } })?)) diff --git a/actix-multipart/src/lib.rs b/actix-multipart/src/lib.rs index 51b06db38..853648beb 100644 --- a/actix-multipart/src/lib.rs +++ b/actix-multipart/src/lib.rs @@ -5,7 +5,7 @@ //! ```no_run //! use actix_web::{post, App, HttpServer, Responder}; //! -//! use actix_multipart::form::{json::Json as MPJson, tempfile::TempFile, MultipartForm}; +//! use actix_multipart::form::{json::Json as MpJson, tempfile::TempFile, MultipartForm}; //! use serde::Deserialize; //! //! #[derive(Debug, Deserialize)] @@ -17,7 +17,7 @@ //! struct UploadForm { //! #[multipart(limit = "100MB")] //! file: TempFile, -//! json: MPJson, +//! json: MpJson, //! } //! //! #[post("/videos")] @@ -36,6 +36,15 @@ //! .await //! } //! ``` +//! +//! cURL request: +//! +//! ```sh +//! curl -v --request POST \ +//! --url http://localhost:8080/videos \ +//! -F 'json={"name": "Cargo.lock"};type=application/json' \ +//! -F file=@./Cargo.lock +//! ``` #![deny(rust_2018_idioms, nonstandard_style)] #![warn(future_incompatible)] @@ -57,7 +66,4 @@ pub mod test; pub use self::{ error::MultipartError, server::{Field, Multipart}, - test::{ - create_form_data_payload_and_headers, create_form_data_payload_and_headers_with_boundary, - }, }; diff --git a/actix-multipart/src/server.rs b/actix-multipart/src/server.rs index 0256aa7bf..76eae11ff 100644 --- a/actix-multipart/src/server.rs +++ b/actix-multipart/src/server.rs @@ -10,12 +10,15 @@ use std::{ }; use actix_web::{ + dev, error::{ParseError, PayloadError}, http::header::{self, ContentDisposition, HeaderMap, HeaderName, HeaderValue}, + HttpRequest, }; use bytes::{Bytes, BytesMut}; use futures_core::stream::{LocalBoxStream, Stream}; use local_waker::LocalWaker; +use mime::Mime; use crate::error::MultipartError; @@ -23,87 +26,79 @@ const MAX_HEADERS: usize = 32; /// The server-side implementation of `multipart/form-data` requests. /// -/// This will parse the incoming stream into `MultipartItem` instances via its -/// Stream implementation. -/// `MultipartItem::Field` contains multipart field. `MultipartItem::Multipart` -/// is used for nested multipart streams. +/// This will parse the incoming stream into `MultipartItem` instances via its `Stream` +/// implementation. `MultipartItem::Field` contains multipart field. `MultipartItem::Multipart` is +/// used for nested multipart streams. pub struct Multipart { safety: Safety, - error: Option, inner: Option, -} - -enum InnerMultipartItem { - None, - Field(Rc>), -} - -#[derive(PartialEq, Debug)] -enum InnerState { - /// Stream eof - Eof, - - /// Skip data until first boundary - FirstBoundary, - - /// Reading boundary - Boundary, - - /// Reading Headers, - Headers, -} - -struct InnerMultipart { - payload: PayloadRef, - boundary: String, - state: InnerState, - item: InnerMultipartItem, + error: Option, } impl Multipart { - /// Create multipart instance for boundary. - pub fn new(headers: &HeaderMap, stream: S) -> Multipart + /// Creates multipart instance from parts. + pub fn new(headers: &HeaderMap, stream: S) -> Self where S: Stream> + 'static, { - match Self::boundary(headers) { - Ok(boundary) => Multipart::from_boundary(boundary, stream), - Err(err) => Multipart::from_error(err), + match Self::find_ct_and_boundary(headers) { + Ok((ct, boundary)) => Self::from_ct_and_boundary(ct, boundary, stream), + Err(err) => Self::from_error(err), } } - /// Extract boundary info from headers. - pub(crate) fn boundary(headers: &HeaderMap) -> Result { - headers - .get(&header::CONTENT_TYPE) - .ok_or(MultipartError::NoContentType)? - .to_str() - .ok() - .and_then(|content_type| content_type.parse::().ok()) - .ok_or(MultipartError::ParseContentType)? - .get_param(mime::BOUNDARY) - .map(|boundary| boundary.as_str().to_owned()) - .ok_or(MultipartError::Boundary) + /// Creates multipart instance from parts. + pub(crate) fn from_req(req: &HttpRequest, payload: &mut dev::Payload) -> Self { + match Self::find_ct_and_boundary(req.headers()) { + Ok((ct, boundary)) => Self::from_ct_and_boundary(ct, boundary, payload.take()), + Err(err) => Self::from_error(err), + } } - /// Create multipart instance for given boundary and stream - pub(crate) fn from_boundary(boundary: String, stream: S) -> Multipart + /// Extract Content-Type and boundary info from headers. + pub(crate) fn find_ct_and_boundary( + headers: &HeaderMap, + ) -> Result<(Mime, String), MultipartError> { + let content_type = headers + .get(&header::CONTENT_TYPE) + .ok_or(MultipartError::ContentTypeMissing)? + .to_str() + .ok() + .and_then(|content_type| content_type.parse::().ok()) + .ok_or(MultipartError::ContentTypeParse)?; + + if content_type.type_() != mime::MULTIPART { + return Err(MultipartError::ContentTypeIncompatible); + } + + let boundary = content_type + .get_param(mime::BOUNDARY) + .ok_or(MultipartError::BoundaryMissing)? + .as_str() + .to_owned(); + + Ok((content_type, boundary)) + } + + /// Constructs a new multipart reader from given Content-Type, boundary, and stream. + pub(crate) fn from_ct_and_boundary(ct: Mime, boundary: String, stream: S) -> Multipart where S: Stream> + 'static, { Multipart { - error: None, safety: Safety::new(), inner: Some(InnerMultipart { - boundary, payload: PayloadRef::new(PayloadBuffer::new(stream)), + content_type: ct, + boundary, state: InnerState::FirstBoundary, item: InnerMultipartItem::None, }), + error: None, } } - /// Create Multipart instance from MultipartError + /// Constructs a new multipart reader from given `MultipartError`. pub(crate) fn from_error(err: MultipartError) -> Multipart { Multipart { error: Some(err), @@ -111,6 +106,21 @@ impl Multipart { inner: None, } } + + /// Return requests parsed Content-Type or raise the stored error. + pub(crate) fn content_type_or_bail(&mut self) -> Result { + if let Some(err) = self.error.take() { + return Err(err); + } + + Ok(self + .inner + .as_ref() + // TODO: look into using enum instead of two options + .expect("multipart requests should have state") + .content_type + .clone()) + } } impl Stream for Multipart { @@ -141,8 +151,46 @@ impl Stream for Multipart { } } +#[derive(PartialEq, Debug)] +enum InnerState { + /// Stream EOF. + Eof, + + /// Skip data until first boundary. + FirstBoundary, + + /// Reading boundary. + Boundary, + + /// Reading Headers. + Headers, +} + +enum InnerMultipartItem { + None, + Field(Rc>), +} + +struct InnerMultipart { + /// Request's payload stream & buffer. + payload: PayloadRef, + + /// Request's Content-Type. + /// + /// Guaranteed to have "multipart" top-level media type, i.e., `multipart/*`. + content_type: Mime, + + /// Field boundary. + boundary: String, + + state: InnerState, + item: InnerMultipartItem, +} + impl InnerMultipart { - fn read_headers(payload: &mut PayloadBuffer) -> Result, MultipartError> { + fn read_field_headers( + payload: &mut PayloadBuffer, + ) -> Result, MultipartError> { match payload.read_until(b"\r\n\r\n")? { None => { if payload.eof { @@ -153,6 +201,7 @@ impl InnerMultipart { } Some(bytes) => { let mut hdrs = [httparse::EMPTY_HEADER; MAX_HEADERS]; + match httparse::parse_headers(&bytes, &mut hdrs) { Ok(httparse::Status::Complete((_, hdrs))) => { // convert headers @@ -193,7 +242,7 @@ impl InnerMultipart { || &chunk[..2] != b"--" || &chunk[2..boundary.len() + 2] != boundary.as_bytes() { - Err(MultipartError::Boundary) + Err(MultipartError::BoundaryMissing) } else if &chunk[boundary.len() + 2..] == b"\r\n" { Ok(Some(false)) } else if &chunk[boundary.len() + 2..boundary.len() + 4] == b"--" @@ -202,7 +251,7 @@ impl InnerMultipart { { Ok(Some(true)) } else { - Err(MultipartError::Boundary) + Err(MultipartError::BoundaryMissing) } } } @@ -217,7 +266,7 @@ impl InnerMultipart { match payload.readline()? { Some(chunk) => { if chunk.is_empty() { - return Err(MultipartError::Boundary); + return Err(MultipartError::BoundaryMissing); } if chunk.len() < boundary.len() { continue; @@ -282,7 +331,7 @@ impl InnerMultipart { } } - let headers = if let Some(mut payload) = self.payload.get_mut(safety) { + let field_headers = if let Some(mut payload) = self.payload.get_mut(safety) { match self.state { // read until first boundary InnerState::FirstBoundary => { @@ -317,7 +366,7 @@ impl InnerMultipart { // read field headers for next field if self.state == InnerState::Headers { - if let Some(headers) = InnerMultipart::read_headers(&mut payload)? { + if let Some(headers) = InnerMultipart::read_field_headers(&mut payload)? { self.state = InnerState::Boundary; headers } else { @@ -331,31 +380,37 @@ impl InnerMultipart { return Poll::Pending; }; - // According to RFC 7578 §4.2, a Content-Disposition header must always be present and - // set to "form-data". - - let content_disposition = headers + let field_content_disposition = field_headers .get(&header::CONTENT_DISPOSITION) .and_then(|cd| ContentDisposition::from_raw(cd).ok()) .filter(|content_disposition| { - let is_form_data = - content_disposition.disposition == header::DispositionType::FormData; - - let has_field_name = content_disposition - .parameters - .iter() - .any(|param| matches!(param, header::DispositionParam::Name(_))); - - is_form_data && has_field_name + matches!( + content_disposition.disposition, + header::DispositionType::FormData, + ) }); - let cd = if let Some(content_disposition) = content_disposition { - content_disposition + let form_field_name = if self.content_type.subtype() == mime::FORM_DATA { + // According to RFC 7578 §4.2, which relates to "multipart/form-data" requests + // specifically, fields must have a Content-Disposition header, its disposition + // type must be set as "form-data", and it must have a name parameter. + + let Some(cd) = &field_content_disposition else { + return Poll::Ready(Some(Err(MultipartError::ContentDispositionMissing))); + }; + + let Some(field_name) = cd.get_name() else { + return Poll::Ready(Some(Err(MultipartError::ContentDispositionNameMissing))); + }; + + Some(field_name.to_owned()) } else { - return Poll::Ready(Some(Err(MultipartError::NoContentDisposition))); + None }; - let ct: Option = headers + // TODO: check out other multipart/* RFCs for specific requirements + + let field_content_type: Option = field_headers .get(&header::CONTENT_TYPE) .and_then(|ct| ct.to_str().ok()) .and_then(|ct| ct.parse().ok()); @@ -363,23 +418,24 @@ impl InnerMultipart { self.state = InnerState::Boundary; // nested multipart stream is not supported - if let Some(mime) = &ct { + if let Some(mime) = &field_content_type { if mime.type_() == mime::MULTIPART { return Poll::Ready(Some(Err(MultipartError::Nested))); } } - let field = - InnerField::new_in_rc(self.payload.clone(), self.boundary.clone(), &headers)?; + let field_inner = + InnerField::new_in_rc(self.payload.clone(), self.boundary.clone(), &field_headers)?; - self.item = InnerMultipartItem::Field(Rc::clone(&field)); + self.item = InnerMultipartItem::Field(Rc::clone(&field_inner)); Poll::Ready(Some(Ok(Field::new( + field_content_type, + field_content_disposition, + form_field_name, + field_headers, safety.clone(cx), - headers, - ct, - cd, - field, + field_inner, )))) } } @@ -392,26 +448,42 @@ impl Drop for InnerMultipart { } } -/// A single field in a multipart stream +/// A single field in a multipart stream. pub struct Field { - ct: Option, - cd: ContentDisposition, + /// Field's Content-Type. + content_type: Option, + + /// Field's Content-Disposition. + content_disposition: Option, + + /// Form field name. + /// + /// A non-optional storage for form field names to avoid unwraps in `form` module. Will be an + /// empty string in non-form contexts. + /// + // INVARIANT: always non-empty when request content-type is multipart/form-data. + pub(crate) form_field_name: String, + + /// Field's header map. headers: HeaderMap, - inner: Rc>, + safety: Safety, + inner: Rc>, } impl Field { fn new( - safety: Safety, + content_type: Option, + content_disposition: Option, + form_field_name: Option, headers: HeaderMap, - ct: Option, - cd: ContentDisposition, + safety: Safety, inner: Rc>, ) -> Self { Field { - ct, - cd, + content_type, + content_disposition, + form_field_name: form_field_name.unwrap_or_default(), headers, inner, safety, @@ -428,34 +500,36 @@ impl Field { /// According to [RFC 7578](https://www.rfc-editor.org/rfc/rfc7578#section-4.4), if it is not /// present, it should default to "text/plain". Note it is the responsibility of the client to /// provide the appropriate content type, there is no attempt to validate this by the server. - pub fn content_type(&self) -> Option<&mime::Mime> { - self.ct.as_ref() + pub fn content_type(&self) -> Option<&Mime> { + self.content_type.as_ref() } - /// Returns the field's Content-Disposition. + /// Returns this field's parsed Content-Disposition header, if set. /// - /// Per [RFC 7578 §4.2]: "Each part MUST contain a Content-Disposition header field where the - /// disposition type is `form-data`. The Content-Disposition header field MUST also contain an - /// additional parameter of `name`; the value of the `name` parameter is the original field name - /// from the form." + /// # Validation /// - /// This crate validates that it exists before returning a `Field`. As such, it is safe to - /// unwrap `.content_disposition().get_name()`. The [name](Self::name) method is provided as - /// a convenience. + /// Per [RFC 7578 §4.2], the parts of a multipart/form-data payload MUST contain a + /// Content-Disposition header field where the disposition type is `form-data` and MUST also + /// contain an additional parameter of `name` with its value being the original field name from + /// the form. This requirement is enforced during extraction for multipart/form-data requests, + /// but not other kinds of multipart requests (such as multipart/related). + /// + /// As such, it is safe to `.unwrap()` calls `.content_disposition()` if you've verified. + /// + /// The [`name()`](Self::name) method is also provided as a convenience for obtaining the + /// aforementioned name parameter. /// /// [RFC 7578 §4.2]: https://datatracker.ietf.org/doc/html/rfc7578#section-4.2 - pub fn content_disposition(&self) -> &ContentDisposition { - &self.cd + pub fn content_disposition(&self) -> Option<&ContentDisposition> { + self.content_disposition.as_ref() } - /// Returns the field's name. + /// Returns the field's name, if set. /// - /// See [content_disposition](Self::content_disposition) regarding guarantees about existence of - /// the name field. - pub fn name(&self) -> &str { - self.content_disposition() - .get_name() - .expect("field name should be guaranteed to exist in multipart form-data") + /// See [`content_disposition()`](Self::content_disposition) regarding guarantees on presence of + /// the "name" field. + pub fn name(&self) -> Option<&str> { + self.content_disposition()?.get_name() } } @@ -465,6 +539,7 @@ impl Stream for Field { fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let this = self.get_mut(); let mut inner = this.inner.borrow_mut(); + if let Some(mut buffer) = inner .payload .as_ref() @@ -486,7 +561,7 @@ impl Stream for Field { impl fmt::Debug for Field { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(ct) = &self.ct { + if let Some(ct) = &self.content_type { writeln!(f, "\nField: {}", ct)?; } else { writeln!(f, "\nField:")?; @@ -570,6 +645,7 @@ impl InnerField { } /// Reads content chunk of body part with unknown length. + /// /// The `Content-Length` header for body part is not necessary. fn read_stream( payload: &mut PayloadBuffer, @@ -704,8 +780,8 @@ impl PayloadRef { } } - fn get_mut(&self, s: &Safety) -> Option> { - if s.current() { + fn get_mut(&self, safety: &Safety) -> Option> { + if safety.current() { Some(self.payload.borrow_mut()) } else { None @@ -722,10 +798,11 @@ impl Clone for PayloadRef { } /// Counter. It tracks of number of clones of payloads and give access to payload only to top most. -/// * When dropped, parent task is awakened. This is to support the case where Field is -/// dropped in a separate task than Multipart. -/// * Assumes that parent owners don't move to different tasks; only the top-most is allowed to. -/// * If dropped and is not top most owner, is_clean flag is set to false. +/// +/// - When dropped, parent task is awakened. This is to support the case where `Field` is dropped in +/// a separate task than `Multipart`. +/// - Assumes that parent owners don't move to different tasks; only the top-most is allowed to. +/// - If dropped and is not top most owner, is_clean flag is set to false. #[derive(Debug)] struct Safety { task: LocalWaker, @@ -876,6 +953,7 @@ mod tests { test::TestRequest, FromRequest, }; + use assert_matches::assert_matches; use bytes::BufMut as _; use futures_util::{future::lazy, StreamExt as _}; use tokio::sync::mpsc; @@ -888,8 +966,8 @@ mod tests { #[actix_rt::test] async fn test_boundary() { let headers = HeaderMap::new(); - match Multipart::boundary(&headers) { - Err(MultipartError::NoContentType) => {} + match Multipart::find_ct_and_boundary(&headers) { + Err(MultipartError::ContentTypeMissing) => {} _ => unreachable!("should not happen"), } @@ -899,8 +977,8 @@ mod tests { header::HeaderValue::from_static("test"), ); - match Multipart::boundary(&headers) { - Err(MultipartError::ParseContentType) => {} + match Multipart::find_ct_and_boundary(&headers) { + Err(MultipartError::ContentTypeParse) => {} _ => unreachable!("should not happen"), } @@ -909,8 +987,8 @@ mod tests { header::CONTENT_TYPE, header::HeaderValue::from_static("multipart/mixed"), ); - match Multipart::boundary(&headers) { - Err(MultipartError::Boundary) => {} + match Multipart::find_ct_and_boundary(&headers) { + Err(MultipartError::BoundaryMissing) => {} _ => unreachable!("should not happen"), } @@ -923,8 +1001,8 @@ mod tests { ); assert_eq!( - Multipart::boundary(&headers).unwrap(), - "5c02368e880e436dab70ed54e1c58209" + Multipart::find_ct_and_boundary(&headers).unwrap().1, + "5c02368e880e436dab70ed54e1c58209", ); } @@ -1059,7 +1137,7 @@ mod tests { let mut multipart = Multipart::new(&headers, payload); match multipart.next().await { Some(Ok(mut field)) => { - let cd = field.content_disposition(); + let cd = field.content_disposition().unwrap(); assert_eq!(cd.disposition, DispositionType::FormData); assert_eq!(cd.parameters[0], DispositionParam::Name("file".into())); @@ -1121,7 +1199,7 @@ mod tests { let mut multipart = Multipart::new(&headers, payload); match multipart.next().await.unwrap() { Ok(mut field) => { - let cd = field.content_disposition(); + let cd = field.content_disposition().unwrap(); assert_eq!(cd.disposition, DispositionType::FormData); assert_eq!(cd.parameters[0], DispositionParam::Name("file".into())); @@ -1245,7 +1323,7 @@ mod tests { #[actix_rt::test] async fn test_multipart_from_error() { - let err = MultipartError::NoContentType; + let err = MultipartError::ContentTypeMissing; let mut multipart = Multipart::from_error(err); assert!(multipart.next().await.unwrap().is_err()) } @@ -1254,9 +1332,8 @@ mod tests { async fn test_multipart_from_boundary() { let (_, payload) = create_stream(); let (_, headers) = create_simple_request_with_header(); - let boundary = Multipart::boundary(&headers); - assert!(boundary.is_ok()); - let _ = Multipart::from_boundary(boundary.unwrap(), payload); + let (ct, boundary) = Multipart::find_ct_and_boundary(&headers).unwrap(); + let _ = Multipart::from_ct_and_boundary(ct, boundary, payload); } #[actix_rt::test] @@ -1278,11 +1355,43 @@ mod tests { } #[actix_rt::test] - async fn no_content_disposition() { + async fn no_content_disposition_form_data() { let bytes = Bytes::from( "testasdadsad\r\n\ --abbc761f78ff4d7cb7573b5a23f96ef0\r\n\ - Content-Type: text/plain; charset=utf-8\r\nContent-Length: 4\r\n\r\n\ + Content-Type: text/plain; charset=utf-8\r\n\ + Content-Length: 4\r\n\ + \r\n\ + test\r\n\ + --abbc761f78ff4d7cb7573b5a23f96ef0\r\n", + ); + let mut headers = HeaderMap::new(); + headers.insert( + header::CONTENT_TYPE, + header::HeaderValue::from_static( + "multipart/form-data; boundary=\"abbc761f78ff4d7cb7573b5a23f96ef0\"", + ), + ); + let payload = SlowStream::new(bytes); + + let mut multipart = Multipart::new(&headers, payload); + let res = multipart.next().await.unwrap(); + assert_matches!( + res.expect_err( + "according to RFC 7578, form-data fields require a content-disposition header" + ), + MultipartError::ContentDispositionMissing + ); + } + + #[actix_rt::test] + async fn no_content_disposition_non_form_data() { + let bytes = Bytes::from( + "testasdadsad\r\n\ + --abbc761f78ff4d7cb7573b5a23f96ef0\r\n\ + Content-Type: text/plain; charset=utf-8\r\n\ + Content-Length: 4\r\n\ + \r\n\ test\r\n\ --abbc761f78ff4d7cb7573b5a23f96ef0\r\n", ); @@ -1297,20 +1406,18 @@ mod tests { let mut multipart = Multipart::new(&headers, payload); let res = multipart.next().await.unwrap(); - assert!(res.is_err()); - assert!(matches!( - res.unwrap_err(), - MultipartError::NoContentDisposition, - )); + res.unwrap(); } #[actix_rt::test] - async fn no_name_in_content_disposition() { + async fn no_name_in_form_data_content_disposition() { let bytes = Bytes::from( "testasdadsad\r\n\ --abbc761f78ff4d7cb7573b5a23f96ef0\r\n\ Content-Disposition: form-data; filename=\"fn.txt\"\r\n\ - Content-Type: text/plain; charset=utf-8\r\nContent-Length: 4\r\n\r\n\ + Content-Type: text/plain; charset=utf-8\r\n\ + Content-Length: 4\r\n\ + \r\n\ test\r\n\ --abbc761f78ff4d7cb7573b5a23f96ef0\r\n", ); @@ -1318,18 +1425,17 @@ mod tests { headers.insert( header::CONTENT_TYPE, header::HeaderValue::from_static( - "multipart/mixed; boundary=\"abbc761f78ff4d7cb7573b5a23f96ef0\"", + "multipart/form-data; boundary=\"abbc761f78ff4d7cb7573b5a23f96ef0\"", ), ); let payload = SlowStream::new(bytes); let mut multipart = Multipart::new(&headers, payload); let res = multipart.next().await.unwrap(); - assert!(res.is_err()); - assert!(matches!( - res.unwrap_err(), - MultipartError::NoContentDisposition, - )); + assert_matches!( + res.expect_err("according to RFC 7578, form-data fields require a name attribute"), + MultipartError::ContentDispositionNameMissing + ); } #[actix_rt::test] @@ -1362,7 +1468,7 @@ mod tests { let mut field = multipart.next().await.unwrap().unwrap(); let task = rt::spawn(async move { - rt::time::sleep(Duration::from_secs(1)).await; + rt::time::sleep(Duration::from_millis(500)).await; assert_eq!(field.next().await.unwrap().unwrap(), "test"); drop(field); }); diff --git a/actix-multipart/src/test.rs b/actix-multipart/src/test.rs index 77d918283..828f37957 100644 --- a/actix-multipart/src/test.rs +++ b/actix-multipart/src/test.rs @@ -1,3 +1,5 @@ +//! Multipart testing utilities. + use actix_web::http::header::{self, HeaderMap}; use bytes::{BufMut as _, Bytes, BytesMut}; use mime::Mime; diff --git a/actix-web/src/http/header/content_disposition.rs b/actix-web/src/http/header/content_disposition.rs index 9725cd19b..824bf1195 100644 --- a/actix-web/src/http/header/content_disposition.rs +++ b/actix-web/src/http/header/content_disposition.rs @@ -154,7 +154,7 @@ impl DispositionParam { #[inline] pub fn as_name(&self) -> Option<&str> { match self { - DispositionParam::Name(ref name) => Some(name.as_str()), + DispositionParam::Name(name) => Some(name.as_str()), _ => None, } } @@ -163,7 +163,7 @@ impl DispositionParam { #[inline] pub fn as_filename(&self) -> Option<&str> { match self { - DispositionParam::Filename(ref filename) => Some(filename.as_str()), + DispositionParam::Filename(filename) => Some(filename.as_str()), _ => None, } } @@ -172,7 +172,7 @@ impl DispositionParam { #[inline] pub fn as_filename_ext(&self) -> Option<&ExtendedValue> { match self { - DispositionParam::FilenameExt(ref value) => Some(value), + DispositionParam::FilenameExt(value) => Some(value), _ => None, } }