diff --git a/actix-files/CHANGES.md b/actix-files/CHANGES.md index 75d616ff9..271476d91 100644 --- a/actix-files/CHANGES.md +++ b/actix-files/CHANGES.md @@ -1,6 +1,14 @@ # Changes ## [Unreleased] - 2020-xx-xx +* Add `Files::prefer_utf8` option that adds UTF-8 charset on certain response types. [#1714] + +[#1714]: https://github.com/actix/actix-web/pull/1714 + + +## [0.3.0] - 2020-09-11 +* No significant changes from 0.3.0-beta.1. + ## [0.3.0-beta.1] - 2020-07-15 * Update `v_htmlescape` to 0.10 diff --git a/actix-files/src/encoding.rs b/actix-files/src/encoding.rs new file mode 100644 index 000000000..95997e313 --- /dev/null +++ b/actix-files/src/encoding.rs @@ -0,0 +1,52 @@ +use mime::Mime; + +/// Transforms MIME `text/*` types into their UTF-8 equivalent, if supported. +/// +/// MIME types that are converted +/// - application/javascript +/// - text/html +/// - text/css +/// - text/plain +/// - text/csv +/// - text/tab-separated-values +pub(crate) fn equiv_utf8_text(ct: Mime) -> Mime { + // use (roughly) order of file-type popularity for a web server + + if ct == mime::APPLICATION_JAVASCRIPT { + return mime::APPLICATION_JAVASCRIPT_UTF_8; + } + + if ct == mime::TEXT_HTML { + return mime::TEXT_HTML_UTF_8; + } + + if ct == mime::TEXT_CSS { + return mime::TEXT_CSS_UTF_8; + } + + if ct == mime::TEXT_PLAIN { + return mime::TEXT_PLAIN_UTF_8; + } + + if ct == mime::TEXT_CSV { + return mime::TEXT_CSV_UTF_8; + } + + if ct == mime::TEXT_TAB_SEPARATED_VALUES { + return mime::TEXT_TAB_SEPARATED_VALUES_UTF_8; + } + + ct +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_equiv_utf8_text() { + assert_eq!(equiv_utf8_text(mime::TEXT_PLAIN), mime::TEXT_PLAIN_UTF_8); + assert_eq!(equiv_utf8_text(mime::TEXT_XML), mime::TEXT_XML); + assert_eq!(equiv_utf8_text(mime::IMAGE_PNG), mime::IMAGE_PNG); + } +} diff --git a/actix-files/src/files.rs b/actix-files/src/files.rs index 2b55e1aa9..5a783e2dd 100644 --- a/actix-files/src/files.rs +++ b/actix-files/src/files.rs @@ -138,24 +138,33 @@ impl Files { self } - #[inline] /// Specifies whether to use ETag or not. /// /// Default is true. + #[inline] pub fn use_etag(mut self, value: bool) -> Self { self.file_flags.set(named::Flags::ETAG, value); self } - #[inline] /// Specifies whether to use Last-Modified or not. /// /// Default is true. + #[inline] pub fn use_last_modified(mut self, value: bool) -> Self { self.file_flags.set(named::Flags::LAST_MD, value); self } + /// Specifies whether text responses should signal a UTF-8 encoding. + /// + /// Default is false (but will default to true in a future version). + #[inline] + pub fn prefer_utf8(mut self, value: bool) -> Self { + self.file_flags.set(named::Flags::PREFER_UTF8, value); + self + } + /// Specifies custom guards to use for directory listings and files. /// /// Default behaviour allows GET and HEAD. diff --git a/actix-files/src/lib.rs b/actix-files/src/lib.rs index 1fc7cb3f3..120345c40 100644 --- a/actix-files/src/lib.rs +++ b/actix-files/src/lib.rs @@ -8,12 +8,8 @@ //! use actix_files::Files; //! //! let app = App::new() -//! .service(Files::new("/static", ".")); +//! .service(Files::new("/static", ".").prefer_utf8(true)); //! ``` -//! -//! # Implementation Quirks -//! - If a filename contains non-ascii characters, that file will be served with the `charset=utf-8` -//! extension on the Content-Type header. #![deny(rust_2018_idioms)] #![warn(missing_docs, missing_debug_implementations)] @@ -30,6 +26,7 @@ use mime_guess::from_ext; mod chunked; mod directory; +mod encoding; mod error; mod files; mod named; @@ -93,6 +90,9 @@ mod tests { #[actix_rt::test] async fn test_file_extension_to_mime() { + let m = file_extension_to_mime(""); + assert_eq!(m, mime::APPLICATION_OCTET_STREAM); + let m = file_extension_to_mime("jpg"); assert_eq!(m, mime::IMAGE_JPEG); diff --git a/actix-files/src/named.rs b/actix-files/src/named.rs index 3caa4a809..dacb51136 100644 --- a/actix-files/src/named.rs +++ b/actix-files/src/named.rs @@ -22,20 +22,21 @@ use bitflags::bitflags; use futures_util::future::{ready, Ready}; use mime_guess::from_path; -use crate::range::HttpRange; use crate::ChunkedReadFile; +use crate::{encoding::equiv_utf8_text, range::HttpRange}; bitflags! { pub(crate) struct Flags: u8 { - const ETAG = 0b0000_0001; - const LAST_MD = 0b0000_0010; + const ETAG = 0b0000_0001; + const LAST_MD = 0b0000_0010; const CONTENT_DISPOSITION = 0b0000_0100; + const PREFER_UTF8 = 0b0000_1000; } } impl Default for Flags { fn default() -> Self { - Flags::all() + Flags::from_bits_truncate(0b0000_0111) } } @@ -92,6 +93,7 @@ impl NamedFile { }; let ct = from_path(&path).first_or_octet_stream(); + let disposition = match ct.type_() { mime::IMAGE | mime::TEXT | mime::VIDEO => DispositionType::Inline, _ => DispositionType::Attachment, @@ -215,24 +217,33 @@ impl NamedFile { self } - #[inline] - ///Specifies whether to use ETag or not. + /// Specifies whether to use ETag or not. /// - ///Default is true. + /// Default is true. + #[inline] pub fn use_etag(mut self, value: bool) -> Self { self.flags.set(Flags::ETAG, value); self } - #[inline] - ///Specifies whether to use Last-Modified or not. + /// Specifies whether to use Last-Modified or not. /// - ///Default is true. + /// Default is true. + #[inline] pub fn use_last_modified(mut self, value: bool) -> Self { self.flags.set(Flags::LAST_MD, value); self } + /// Specifies whether text responses should signal a UTF-8 encoding. + /// + /// Default is false (but will default to true in a future version). + #[inline] + pub fn prefer_utf8(mut self, value: bool) -> Self { + self.flags.set(Flags::PREFER_UTF8, value); + self + } + pub(crate) fn etag(&self) -> Option { // This etag format is similar to Apache's. self.modified.as_ref().map(|mtime| { @@ -268,18 +279,24 @@ impl NamedFile { /// Creates an `HttpResponse` with file as a streaming body. pub fn into_response(self, req: &HttpRequest) -> Result { if self.status_code != StatusCode::OK { - let mut resp = HttpResponse::build(self.status_code); + let mut res = HttpResponse::build(self.status_code); - resp.set(header::ContentType(self.content_type.clone())) - .if_true(self.flags.contains(Flags::CONTENT_DISPOSITION), |res| { - res.header( - header::CONTENT_DISPOSITION, - self.content_disposition.to_string(), - ); - }); + if self.flags.contains(Flags::PREFER_UTF8) { + let ct = equiv_utf8_text(self.content_type.clone()); + res.header(header::CONTENT_TYPE, ct.to_string()); + } else { + res.header(header::CONTENT_TYPE, self.content_type.to_string()); + } + + if self.flags.contains(Flags::CONTENT_DISPOSITION) { + res.header( + header::CONTENT_DISPOSITION, + self.content_disposition.to_string(), + ); + } if let Some(current_encoding) = self.encoding { - resp.encoding(current_encoding); + res.encoding(current_encoding); } let reader = ChunkedReadFile { @@ -290,7 +307,7 @@ impl NamedFile { counter: 0, }; - return Ok(resp.streaming(reader)); + return Ok(res.streaming(reader)); } let etag = if self.flags.contains(Flags::ETAG) { @@ -342,25 +359,33 @@ impl NamedFile { }; let mut resp = HttpResponse::build(self.status_code); - resp.set(header::ContentType(self.content_type.clone())) - .if_true(self.flags.contains(Flags::CONTENT_DISPOSITION), |res| { - res.header( - header::CONTENT_DISPOSITION, - self.content_disposition.to_string(), - ); - }); + + if self.flags.contains(Flags::PREFER_UTF8) { + let ct = equiv_utf8_text(self.content_type.clone()); + resp.header(header::CONTENT_TYPE, ct.to_string()); + } else { + resp.header(header::CONTENT_TYPE, self.content_type.to_string()); + } + + if self.flags.contains(Flags::CONTENT_DISPOSITION) { + resp.header( + header::CONTENT_DISPOSITION, + self.content_disposition.to_string(), + ); + } // default compressing if let Some(current_encoding) = self.encoding { resp.encoding(current_encoding); } - resp.if_some(last_modified, |lm, resp| { - resp.set(header::LastModified(lm)); - }) - .if_some(etag, |etag, resp| { - resp.set(header::ETag(etag)); - }); + if let Some(lm) = last_modified { + resp.header(header::LAST_MODIFIED, lm.to_string()); + } + + if let Some(etag) = etag { + resp.header(header::ETAG, etag.to_string()); + } resp.header(header::ACCEPT_RANGES, "bytes"); diff --git a/actix-files/tests/encoding.rs b/actix-files/tests/encoding.rs new file mode 100644 index 000000000..d7e01b305 --- /dev/null +++ b/actix-files/tests/encoding.rs @@ -0,0 +1,40 @@ +use actix_files::Files; +use actix_web::{ + http::{ + header::{self, HeaderValue}, + StatusCode, + }, + test::{self, TestRequest}, + App, +}; + +#[actix_rt::test] +async fn test_utf8_file_contents() { + // use default ISO-8859-1 encoding + let mut srv = + test::init_service(App::new().service(Files::new("/", "./tests"))).await; + + let req = TestRequest::with_uri("/utf8.txt").to_request(); + let res = test::call_service(&mut srv, req).await; + + assert_eq!(res.status(), StatusCode::OK); + assert_eq!( + res.headers().get(header::CONTENT_TYPE), + Some(&HeaderValue::from_static("text/plain")), + ); + + // prefer UTF-8 encoding + let mut srv = test::init_service( + App::new().service(Files::new("/", "./tests").prefer_utf8(true)), + ) + .await; + + let req = TestRequest::with_uri("/utf8.txt").to_request(); + let res = test::call_service(&mut srv, req).await; + + assert_eq!(res.status(), StatusCode::OK); + assert_eq!( + res.headers().get(header::CONTENT_TYPE), + Some(&HeaderValue::from_static("text/plain; charset=utf-8")), + ); +} diff --git a/actix-files/tests/utf8.txt b/actix-files/tests/utf8.txt new file mode 100644 index 000000000..d8590e2f5 --- /dev/null +++ b/actix-files/tests/utf8.txt @@ -0,0 +1,3 @@ +中文内容显示正确。 + +English is OK. diff --git a/rust-toolchain b/rust-toolchain deleted file mode 100644 index a50908ca3..000000000 --- a/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -1.42.0