1
0
Fork 0
mirror of https://github.com/actix/actix-web.git synced 2024-05-19 16:58:14 +00:00

rework actix_router::Quoter (#2709)

Co-authored-by: Rob Ede <robjtede@icloud.com>
This commit is contained in:
Ali MJ Al-Nasrawy 2022-03-28 23:58:35 +03:00 committed by GitHub
parent e942d3e3b1
commit 40048a5811
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 157 additions and 172 deletions

View file

@ -32,7 +32,12 @@ criterion = { version = "0.3", features = ["html_reports"] }
firestorm = { version = "0.5", features = ["enable_system_time"] }
http = "0.2.5"
serde = { version = "1", features = ["derive"] }
percent-encoding = "2.1"
[[bench]]
name = "router"
harness = false
[[bench]]
name = "quoter"
harness = false

View file

@ -0,0 +1,52 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use std::borrow::Cow;
fn compare_quoters(c: &mut Criterion) {
let mut group = c.benchmark_group("Compare Quoters");
let quoter = actix_router::Quoter::new(b"", b"");
let path_quoted = (0..=0x7f)
.map(|c| format!("%{:02X}", c))
.collect::<String>();
let path_unquoted = ('\u{00}'..='\u{7f}').collect::<String>();
group.bench_function("quoter_unquoted", |b| {
b.iter(|| {
for _ in 0..10 {
black_box(quoter.requote(path_unquoted.as_bytes()));
}
});
});
group.bench_function("percent_encode_unquoted", |b| {
b.iter(|| {
for _ in 0..10 {
let decode = percent_encoding::percent_decode(path_unquoted.as_bytes());
black_box(Into::<Cow<'_, [u8]>>::into(decode));
}
});
});
group.bench_function("quoter_quoted", |b| {
b.iter(|| {
for _ in 0..10 {
black_box(quoter.requote(path_quoted.as_bytes()));
}
});
});
group.bench_function("percent_encode_quoted", |b| {
b.iter(|| {
for _ in 0..10 {
let decode = percent_encoding::percent_decode(path_quoted.as_bytes());
black_box(Into::<Cow<'_, [u8]>>::into(decode));
}
});
});
group.finish();
}
criterion_group!(benches, compare_quoters);
criterion_main!(benches);

View file

@ -7,7 +7,7 @@ use crate::path::{Path, PathIter};
use crate::{Quoter, ResourcePath};
thread_local! {
static FULL_QUOTER: Quoter = Quoter::new(b"+/%", b"");
static FULL_QUOTER: Quoter = Quoter::new(b"", b"");
}
macro_rules! unsupported_type {

View file

@ -1,132 +1,89 @@
#[allow(dead_code)]
const GEN_DELIMS: &[u8] = b":/?#[]@";
#[allow(dead_code)]
const SUB_DELIMS_WITHOUT_QS: &[u8] = b"!$'()*,";
#[allow(dead_code)]
const SUB_DELIMS: &[u8] = b"!$'()*,+?=;";
#[allow(dead_code)]
const RESERVED: &[u8] = b":/?#[]@!$'()*,+?=;";
#[allow(dead_code)]
const UNRESERVED: &[u8] = b"abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890
-._~";
const ALLOWED: &[u8] = b"abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
1234567890
-._~
!$'()*,";
const QS: &[u8] = b"+&=;b";
/// A quoter
/// Partial percent-decoding.
///
/// Performs percent-decoding on a slice but can selectively skip decoding certain sequences.
///
/// # Examples
/// ```
/// # use actix_router::Quoter;
/// // + is set as a protected character and will not be decoded...
/// let q = Quoter::new(&[], b"+");
///
/// // ...but the other encoded characters (like the hyphen below) will.
/// assert_eq!(q.requote(b"/a%2Db%2Bc").unwrap(), b"/a-b%2Bc");
/// ```
pub struct Quoter {
/// Simple bit-map of safe values in the 0-127 ASCII range.
safe_table: [u8; 16],
/// Simple bit-map of protected values in the 0-127 ASCII range.
protected_table: [u8; 16],
protected_table: AsciiBitmap,
}
impl Quoter {
pub fn new(safe: &[u8], protected: &[u8]) -> Quoter {
let mut quoter = Quoter {
safe_table: [0; 16],
protected_table: [0; 16],
};
// prepare safe table
for ch in 0..128 {
if ALLOWED.contains(&ch) {
set_bit(&mut quoter.safe_table, ch);
}
if QS.contains(&ch) {
set_bit(&mut quoter.safe_table, ch);
}
}
for &ch in safe {
set_bit(&mut quoter.safe_table, ch)
}
/// Constructs a new `Quoter` instance given a set of protected ASCII bytes.
///
/// The first argument is ignored but is kept for backward compatibility.
///
/// # Panics
/// Panics if any of the `protected` bytes are not in the 0-127 ASCII range.
pub fn new(_: &[u8], protected: &[u8]) -> Quoter {
let mut protected_table = AsciiBitmap::default();
// prepare protected table
for &ch in protected {
set_bit(&mut quoter.safe_table, ch);
set_bit(&mut quoter.protected_table, ch);
protected_table.set_bit(ch);
}
quoter
Quoter { protected_table }
}
/// Decodes safe percent-encoded sequences from `val`.
///
/// Returns `None` when no modification to the original byte string was required.
///
/// Non-ASCII bytes are accepted as valid input.
///
/// Behavior for invalid/incomplete percent-encoding sequences is unspecified and may include
/// removing the invalid sequence from the output or passing it as-is.
pub fn requote(&self, val: &[u8]) -> Option<Vec<u8>> {
let mut has_pct = 0;
let mut pct = [b'%', 0, 0];
let mut idx = 0;
let mut cloned: Option<Vec<u8>> = None;
let len = val.len();
while idx < len {
let ch = val[idx];
if has_pct != 0 {
pct[has_pct] = val[idx];
has_pct += 1;
if has_pct == 3 {
has_pct = 0;
let buf = cloned.as_mut().unwrap();
if let Some(ch) = hex_pair_to_char(pct[1], pct[2]) {
if ch < 128 {
if bit_at(&self.protected_table, ch) {
buf.extend_from_slice(&pct);
idx += 1;
continue;
}
if bit_at(&self.safe_table, ch) {
buf.push(ch);
idx += 1;
continue;
}
}
buf.push(ch);
} else {
buf.extend_from_slice(&pct[..]);
}
/// Decodes the next escape sequence, if any, and advances `val`.
#[inline(always)]
fn decode_next<'a>(&self, val: &mut &'a [u8]) -> Option<(&'a [u8], u8)> {
for i in 0..val.len() {
if let (prev, [b'%', p1, p2, rem @ ..]) = val.split_at(i) {
if let Some(ch) = hex_pair_to_char(*p1, *p2)
// ignore protected ascii bytes
.filter(|&ch| !(ch < 128 && self.protected_table.bit_at(ch)))
{
*val = rem;
return Some((prev, ch));
}
} else if ch == b'%' {
has_pct = 1;
if cloned.is_none() {
let mut c = Vec::with_capacity(len);
c.extend_from_slice(&val[..idx]);
cloned = Some(c);
}
} else if let Some(ref mut cloned) = cloned {
cloned.push(ch)
}
idx += 1;
}
cloned
None
}
/// Partially percent-decodes the given bytes.
///
/// Escape sequences of the protected set are *not* decoded.
///
/// Returns `None` when no modification to the original bytes was required.
///
/// Invalid/incomplete percent-encoding sequences are passed unmodified.
pub fn requote(&self, val: &[u8]) -> Option<Vec<u8>> {
let mut remaining = val;
// early return indicates that no percent-encoded sequences exist and we can skip allocation
let (pre, decoded_char) = self.decode_next(&mut remaining)?;
// decoded output will always be shorter than the input
let mut decoded = Vec::<u8>::with_capacity(val.len());
// push first segment and decoded char
decoded.extend_from_slice(pre);
decoded.push(decoded_char);
// decode and push rest of segments and decoded chars
while let Some((prev, ch)) = self.decode_next(&mut remaining) {
// this ugly conditional achieves +50% perf in cases where this is a tight loop.
if !prev.is_empty() {
decoded.extend_from_slice(prev);
}
decoded.push(ch);
}
decoded.extend_from_slice(remaining);
Some(decoded)
}
pub(crate) fn requote_str_lossy(&self, val: &str) -> Option<String> {
@ -135,24 +92,6 @@ impl Quoter {
}
}
/// Converts an ASCII character in the hex-encoded set (`0-9`, `A-F`, `a-f`) to its integer
/// representation from `0x0``0xF`.
///
/// - `0x30 ('0') => 0x0`
/// - `0x39 ('9') => 0x9`
/// - `0x41 ('a') => 0xA`
/// - `0x61 ('A') => 0xA`
/// - `0x46 ('f') => 0xF`
/// - `0x66 ('F') => 0xF`
fn from_ascii_hex(v: u8) -> Option<u8> {
match v {
b'0'..=b'9' => Some(v - 0x30), // ord('0') == 0x30
b'A'..=b'F' => Some(v - 0x41 + 10), // ord('A') == 0x41
b'a'..=b'f' => Some(v - 0x61 + 10), // ord('a') == 0x61
_ => None,
}
}
/// Decode a ASCII hex-encoded pair to an integer.
///
/// Returns `None` if either portion of the decoded pair does not evaluate to a valid hex value.
@ -160,64 +99,52 @@ fn from_ascii_hex(v: u8) -> Option<u8> {
/// - `0x33 ('3'), 0x30 ('0') => 0x30 ('0')`
/// - `0x34 ('4'), 0x31 ('1') => 0x41 ('A')`
/// - `0x36 ('6'), 0x31 ('1') => 0x61 ('a')`
#[inline(always)]
fn hex_pair_to_char(d1: u8, d2: u8) -> Option<u8> {
let (d_high, d_low) = (from_ascii_hex(d1)?, from_ascii_hex(d2)?);
let d_high = char::from(d1).to_digit(16)?;
let d_low = char::from(d2).to_digit(16)?;
// left shift high nibble by 4 bits
Some(d_high << 4 | d_low)
Some((d_high as u8) << 4 | (d_low as u8))
}
/// Sets bit in given bit-map to 1=true.
///
/// # Panics
/// Panics if `ch` index is out of bounds.
fn set_bit(array: &mut [u8], ch: u8) {
array[(ch >> 3) as usize] |= 0b1 << (ch & 0b111)
#[derive(Debug, Default, Clone)]
struct AsciiBitmap {
array: [u8; 16],
}
/// Returns true if bit to true in given bit-map.
///
/// # Panics
/// Panics if `ch` index is out of bounds.
fn bit_at(array: &[u8], ch: u8) -> bool {
array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0
impl AsciiBitmap {
/// Sets bit in given bit-map to 1=true.
///
/// # Panics
/// Panics if `ch` index is out of bounds.
fn set_bit(&mut self, ch: u8) {
self.array[(ch >> 3) as usize] |= 0b1 << (ch & 0b111)
}
/// Returns true if bit to true in given bit-map.
///
/// # Panics
/// Panics if `ch` index is out of bounds.
fn bit_at(&self, ch: u8) -> bool {
self.array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hex_encoding() {
let hex = b"0123456789abcdefABCDEF";
for i in 0..256 {
let c = i as u8;
if hex.contains(&c) {
assert!(from_ascii_hex(c).is_some())
} else {
assert!(from_ascii_hex(c).is_none())
}
}
let expected = [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 10, 11, 12, 13, 14, 15,
];
for i in 0..hex.len() {
assert_eq!(from_ascii_hex(hex[i]).unwrap(), expected[i]);
}
}
#[test]
fn custom_quoter() {
let q = Quoter::new(b"", b"+");
assert_eq!(q.requote(b"/a%25c").unwrap(), b"/a%c");
assert_eq!(q.requote(b"/a%2Bc").unwrap(), b"/a%2Bc");
assert_eq!(q.requote(b"/a%2Bc"), None);
let q = Quoter::new(b"%+", b"/");
assert_eq!(q.requote(b"/a%25b%2Bc").unwrap(), b"/a%b+c");
assert_eq!(q.requote(b"/a%2fb").unwrap(), b"/a%2fb");
assert_eq!(q.requote(b"/a%2Fb").unwrap(), b"/a%2Fb");
assert_eq!(q.requote(b"/a%2fb"), None);
assert_eq!(q.requote(b"/a%2Fb"), None);
assert_eq!(q.requote(b"/a%0Ab").unwrap(), b"/a\nb");
assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
assert_eq!(q.requote(b"/a\xfe\xffb"), None);
@ -233,7 +160,8 @@ mod tests {
#[test]
fn invalid_sequences() {
let q = Quoter::new(b"%+", b"/");
assert_eq!(q.requote(b"/a%2x%2X%%").unwrap(), b"/a%2x%2X");
assert_eq!(q.requote(b"/a%2x%2X%%"), None);
assert_eq!(q.requote(b"/a%20%2X%%").unwrap(), b"/a %2X%%");
}
#[test]

View file

@ -3,7 +3,7 @@ use crate::ResourcePath;
use crate::Quoter;
thread_local! {
static DEFAULT_QUOTER: Quoter = Quoter::new(b"@:", b"%/+");
static DEFAULT_QUOTER: Quoter = Quoter::new(b"", b"%/+");
}
#[derive(Debug, Clone, Default)]