From c10f05a8672e53ecada8ec2f15bf9d927efade57 Mon Sep 17 00:00:00 2001 From: LoveSy Date: Sun, 3 Mar 2024 23:50:16 +0800 Subject: [PATCH] Add `unicode` feature to switch between `regex` and `regex-lite` crates as a trade-off between full unicode support and binary size (#3291) * - Add `unicode` feature to switch between `regex` and `regex-lite` as a trade-off between full unicode support and binary size. * Update CHANGES.md * Update CHANGES.md * refactor: move regexset code selection to own module * docs: add docs within RegexSet module * chore: restore manifests * test: ensure all actix-router codepaths are tested --------- Co-authored-by: Rob Ede --- .github/workflows/ci.yml | 1 + actix-router/CHANGES.md | 1 + actix-router/Cargo.toml | 9 ++- actix-router/src/lib.rs | 1 + actix-router/src/regex_set.rs | 66 +++++++++++++++++++ actix-router/src/resource.rs | 15 +++-- actix-web/CHANGES.md | 1 + actix-web/Cargo.toml | 10 ++- .../src/http/header/content_disposition.rs | 3 + actix-web/src/middleware/logger.rs | 22 ++++--- actix-web/src/middleware/normalize.rs | 3 + 11 files changed, 112 insertions(+), 20 deletions(-) create mode 100644 actix-router/src/regex_set.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cd293f5dc..7a7adb246 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -70,6 +70,7 @@ jobs: shell: bash run: | set -e + cargo test --lib --tests -p=actix-router --no-default-features cargo test --lib --tests -p=actix-router --all-features cargo test --lib --tests -p=actix-http --all-features cargo test --lib --tests -p=actix-web --features=rustls-0_20,rustls-0_21,rustls-0_22,openssl -- --skip=test_reading_deflate_encoding_large_random_rustls diff --git a/actix-router/CHANGES.md b/actix-router/CHANGES.md index a80b15e69..8aa3c8639 100644 --- a/actix-router/CHANGES.md +++ b/actix-router/CHANGES.md @@ -2,6 +2,7 @@ ## Unreleased +- Add `unicode` crate feature (on-by-default) to switch between `regex` and `regex-lite` as a trade-off between full unicode support and binary size. - Minimum supported Rust version (MSRV) is now 1.72. ## 0.5.2 diff --git a/actix-router/Cargo.toml b/actix-router/Cargo.toml index de39944cc..0b02e84b9 100644 --- a/actix-router/Cargo.toml +++ b/actix-router/Cargo.toml @@ -17,12 +17,16 @@ name = "actix_router" path = "src/lib.rs" [features] -default = ["http"] +default = ["http", "unicode"] +http = ["dep:http"] +unicode = ["dep:regex"] [dependencies] bytestring = ">=0.1.5, <2" +cfg-if = "1" http = { version = "0.2.7", optional = true } -regex = "1.5" +regex = { version = "1.5", optional = true } +regex-lite = "0.1" serde = "1" tracing = { version = "0.1.30", default-features = false, features = ["log"] } @@ -35,6 +39,7 @@ percent-encoding = "2.1" [[bench]] name = "router" harness = false +required-features = ["unicode"] [[bench]] name = "quoter" diff --git a/actix-router/src/lib.rs b/actix-router/src/lib.rs index f10093436..c4d0d2c87 100644 --- a/actix-router/src/lib.rs +++ b/actix-router/src/lib.rs @@ -10,6 +10,7 @@ mod de; mod path; mod pattern; mod quoter; +mod regex_set; mod resource; mod resource_path; mod router; diff --git a/actix-router/src/regex_set.rs b/actix-router/src/regex_set.rs new file mode 100644 index 000000000..48f38df2c --- /dev/null +++ b/actix-router/src/regex_set.rs @@ -0,0 +1,66 @@ +//! Abstraction over `regex` and `regex-lite` depending on whether we have `unicode` crate feature +//! enabled. + +use cfg_if::cfg_if; +#[cfg(feature = "unicode")] +pub(crate) use regex::{escape, Regex}; +#[cfg(not(feature = "unicode"))] +pub(crate) use regex_lite::{escape, Regex}; + +#[cfg(feature = "unicode")] +#[derive(Debug, Clone)] +pub(crate) struct RegexSet(regex::RegexSet); + +#[cfg(not(feature = "unicode"))] +#[derive(Debug, Clone)] +pub(crate) struct RegexSet(Vec); + +impl RegexSet { + /// Create a new regex set. + /// + /// # Panics + /// + /// Panics if any path patterns are malformed. + pub(crate) fn new(re_set: Vec) -> Self { + cfg_if! { + if #[cfg(feature = "unicode")] { + Self(regex::RegexSet::new(re_set).unwrap()) + } else { + Self(re_set.iter().map(|re| Regex::new(re).unwrap()).collect()) + } + } + } + + /// Create a new empty regex set. + pub(crate) fn empty() -> Self { + cfg_if! { + if #[cfg(feature = "unicode")] { + Self(regex::RegexSet::empty()) + } else { + Self(Vec::new()) + } + } + } + + /// Returns true if regex set matches `path`. + pub(crate) fn is_match(&self, path: &str) -> bool { + cfg_if! { + if #[cfg(feature = "unicode")] { + self.0.is_match(path) + } else { + self.0.iter().any(|re| re.is_match(path)) + } + } + } + + /// Returns index within `path` of first match. + pub(crate) fn first_match_idx(&self, path: &str) -> Option { + cfg_if! { + if #[cfg(feature = "unicode")] { + self.0.matches(path).into_iter().next() + } else { + Some(self.0.iter().enumerate().find(|(_, re)| re.is_match(path))?.0) + } + } + } +} diff --git a/actix-router/src/resource.rs b/actix-router/src/resource.rs index abd132211..3a102945b 100644 --- a/actix-router/src/resource.rs +++ b/actix-router/src/resource.rs @@ -5,10 +5,13 @@ use std::{ mem, }; -use regex::{escape, Regex, RegexSet}; use tracing::error; -use crate::{path::PathItem, IntoPatterns, Patterns, Resource, ResourcePath}; +use crate::{ + path::PathItem, + regex_set::{escape, Regex, RegexSet}, + IntoPatterns, Patterns, Resource, ResourcePath, +}; const MAX_DYNAMIC_SEGMENTS: usize = 16; @@ -233,7 +236,7 @@ enum PatternSegment { Var(String), } -#[derive(Clone, Debug)] +#[derive(Debug, Clone)] #[allow(clippy::large_enum_variant)] enum PatternType { /// Single constant/literal segment. @@ -603,7 +606,7 @@ impl ResourceDef { PatternType::Dynamic(re, _) => Some(re.captures(path)?[1].len()), PatternType::DynamicSet(re, params) => { - let idx = re.matches(path).into_iter().next()?; + let idx = re.first_match_idx(path)?; let (ref pattern, _) = params[idx]; Some(pattern.captures(path)?[1].len()) } @@ -706,7 +709,7 @@ impl ResourceDef { PatternType::DynamicSet(re, params) => { let path = path.unprocessed(); - let (pattern, names) = match re.matches(path).into_iter().next() { + let (pattern, names) = match re.first_match_idx(path) { Some(idx) => ¶ms[idx], _ => return false, }; @@ -870,7 +873,7 @@ impl ResourceDef { } } - let pattern_re_set = RegexSet::new(re_set).unwrap(); + let pattern_re_set = RegexSet::new(re_set); let segments = segments.unwrap_or_default(); ( diff --git a/actix-web/CHANGES.md b/actix-web/CHANGES.md index 88215293a..8fa7ae27d 100644 --- a/actix-web/CHANGES.md +++ b/actix-web/CHANGES.md @@ -4,6 +4,7 @@ ### Changed +- Add `unicode` crate feature (on-by-default) to switch between `regex` and `regex-lite` as a trade-off between full unicode support and binary size. - Minimum supported Rust version (MSRV) is now 1.72. ## 4.5.1 diff --git a/actix-web/Cargo.toml b/actix-web/Cargo.toml index b045589bd..aafc3dda8 100644 --- a/actix-web/Cargo.toml +++ b/actix-web/Cargo.toml @@ -40,7 +40,7 @@ name = "actix_web" path = "src/lib.rs" [features] -default = ["macros", "compress-brotli", "compress-gzip", "compress-zstd", "cookies", "http2"] +default = ["macros", "compress-brotli", "compress-gzip", "compress-zstd", "cookies", "http2", "unicode"] # Brotli algorithm content-encoding support compress-brotli = ["actix-http/compress-brotli", "__compress"] @@ -72,6 +72,9 @@ rustls-0_21 = ["http2", "actix-http/rustls-0_21", "actix-tls/accept", "actix-tls # TLS via Rustls v0.22 rustls-0_22 = ["http2", "actix-http/rustls-0_22", "actix-tls/accept", "actix-tls/rustls-0_22"] +# Full unicode support +unicode = ["dep:regex", "actix-router/unicode"] + # Internal (PRIVATE!) features used to aid testing and checking feature status. # Don't rely on these whatsoever. They may disappear at anytime. __compress = [] @@ -89,7 +92,7 @@ actix-utils = "3" actix-tls = { version = "3.3", default-features = false, optional = true } actix-http = { version = "3.6", features = ["ws"] } -actix-router = "0.5" +actix-router = { version = "0.5", default-features = false, features = ["http"] } actix-web-codegen = { version = "4.2", optional = true } ahash = "0.8" @@ -107,7 +110,8 @@ log = "0.4" mime = "0.3" once_cell = "1.5" pin-project-lite = "0.2.7" -regex = "1.5.5" +regex = { version = "1.5.5", optional = true } +regex-lite = "0.1" serde = "1.0" serde_json = "1.0" serde_urlencoded = "0.7" diff --git a/actix-web/src/http/header/content_disposition.rs b/actix-web/src/http/header/content_disposition.rs index 0606f5aef..9725cd19b 100644 --- a/actix-web/src/http/header/content_disposition.rs +++ b/actix-web/src/http/header/content_disposition.rs @@ -13,7 +13,10 @@ use std::fmt::{self, Write}; use once_cell::sync::Lazy; +#[cfg(feature = "unicode")] use regex::Regex; +#[cfg(not(feature = "unicode"))] +use regex_lite::Regex; use super::{ExtendedValue, Header, TryIntoHeaderValue, Writer}; use crate::http::header; diff --git a/actix-web/src/middleware/logger.rs b/actix-web/src/middleware/logger.rs index ce42c3af1..dc1b02399 100644 --- a/actix-web/src/middleware/logger.rs +++ b/actix-web/src/middleware/logger.rs @@ -18,7 +18,10 @@ use bytes::Bytes; use futures_core::ready; use log::{debug, warn}; use pin_project_lite::pin_project; -use regex::{Regex, RegexSet}; +#[cfg(feature = "unicode")] +use regex::Regex; +#[cfg(not(feature = "unicode"))] +use regex_lite::Regex; use time::{format_description::well_known::Rfc3339, OffsetDateTime}; use crate::{ @@ -87,7 +90,7 @@ pub struct Logger(Rc); struct Inner { format: Format, exclude: HashSet, - exclude_regex: RegexSet, + exclude_regex: Vec, log_target: Cow<'static, str>, } @@ -97,7 +100,7 @@ impl Logger { Logger(Rc::new(Inner { format: Format::new(format), exclude: HashSet::new(), - exclude_regex: RegexSet::empty(), + exclude_regex: Vec::new(), log_target: Cow::Borrowed(module_path!()), })) } @@ -114,10 +117,7 @@ impl Logger { /// Ignore and do not log access info for paths that match regex. pub fn exclude_regex>(mut self, path: T) -> Self { let inner = Rc::get_mut(&mut self.0).unwrap(); - let mut patterns = inner.exclude_regex.patterns().to_vec(); - patterns.push(path.into()); - let regex_set = RegexSet::new(patterns).unwrap(); - inner.exclude_regex = regex_set; + inner.exclude_regex.push(Regex::new(&path.into()).unwrap()); self } @@ -240,7 +240,7 @@ impl Default for Logger { Logger(Rc::new(Inner { format: Format::default(), exclude: HashSet::new(), - exclude_regex: RegexSet::empty(), + exclude_regex: Vec::new(), log_target: Cow::Borrowed(module_path!()), })) } @@ -300,7 +300,11 @@ where fn call(&self, req: ServiceRequest) -> Self::Future { let excluded = self.inner.exclude.contains(req.path()) - || self.inner.exclude_regex.is_match(req.path()); + || self + .inner + .exclude_regex + .iter() + .any(|r| r.is_match(req.path())); if excluded { LoggerResponse { diff --git a/actix-web/src/middleware/normalize.rs b/actix-web/src/middleware/normalize.rs index 3f20431c0..482107ecb 100644 --- a/actix-web/src/middleware/normalize.rs +++ b/actix-web/src/middleware/normalize.rs @@ -4,7 +4,10 @@ use actix_http::uri::{PathAndQuery, Uri}; use actix_service::{Service, Transform}; use actix_utils::future::{ready, Ready}; use bytes::Bytes; +#[cfg(feature = "unicode")] use regex::Regex; +#[cfg(not(feature = "unicode"))] +use regex_lite::Regex; use crate::{ service::{ServiceRequest, ServiceResponse},