From c386353337cff83626941fca2b58628845b440f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois?= Date: Sat, 24 Nov 2018 14:54:11 +0100 Subject: [PATCH] decode reserved characters when extracting path with configuration (#577) * decode reserved characters when extracting path with configuration * remove useless clone * add a method to get decoded parameter by name --- CHANGES.md | 9 ++++ MIGRATION.md | 28 +++++++++++++ src/de.rs | 70 ++++++++++++++++++------------- src/extractor.rs | 76 ++++++++++++++++++++++++++++++++- src/param.rs | 33 ++++++++++++++- src/uri.rs | 95 ++++++++++++++++++++++-------------------- tests/test_handlers.rs | 2 +- 7 files changed, 234 insertions(+), 79 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2e028d6db..902a84f69 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,12 @@ * `QueryConfig` and `PathConfig` are made public. +### Added + +* By default, `Path` extractor now percent decode all characters. This behaviour can be disabled + with `PathConfig::default().disable_decoding()` + + ## [0.7.14] - 2018-11-14 ### Added @@ -16,6 +22,9 @@ * Add method to configure `SameSite` option in `CookieIdentityPolicy`. +* By default, `Path` extractor now percent decode all characters. This behaviour can be disabled + with `PathConfig::default().disable_decoding()` + ### Fixed diff --git a/MIGRATION.md b/MIGRATION.md index 3c0bdd943..26a314240 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -1,3 +1,31 @@ +## 0.7.15 + +* The `' '` character is not percent decoded anymore before matching routes. If you need to use it in + your routes, you should use `%20`. + + instead of + + ```rust + fn main() { + let app = App::new().resource("/my index", |r| { + r.method(http::Method::GET) + .with(index); + }); + } + ``` + + use + + ```rust + fn main() { + let app = App::new().resource("/my%20index", |r| { + r.method(http::Method::GET) + .with(index); + }); + } + ``` + + ## 0.7.4 * `Route::with_config()`/`Route::with_async_config()` always passes configuration objects as tuple diff --git a/src/de.rs b/src/de.rs index 59ab79ba9..05f8914f8 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1,7 +1,10 @@ +use std::rc::Rc; + use serde::de::{self, Deserializer, Error as DeError, Visitor}; use httprequest::HttpRequest; use param::ParamsIter; +use uri::RESERVED_QUOTER; macro_rules! unsupported_type { ($trait_fn:ident, $name:expr) => { @@ -13,6 +16,20 @@ macro_rules! unsupported_type { }; } +macro_rules! percent_decode_if_needed { + ($value:expr, $decode:expr) => { + if $decode { + if let Some(ref mut value) = RESERVED_QUOTER.requote($value.as_bytes()) { + Rc::make_mut(value).parse() + } else { + $value.parse() + } + } else { + $value.parse() + } + } +} + macro_rules! parse_single_value { ($trait_fn:ident, $visit_fn:ident, $tp:tt) => { fn $trait_fn(self, visitor: V) -> Result @@ -23,11 +40,11 @@ macro_rules! parse_single_value { format!("wrong number of parameters: {} expected 1", self.req.match_info().len()).as_str())) } else { - let v = self.req.match_info()[0].parse().map_err( - |_| de::value::Error::custom( - format!("can not parse {:?} to a {}", - &self.req.match_info()[0], $tp)))?; - visitor.$visit_fn(v) + let v_parsed = percent_decode_if_needed!(&self.req.match_info()[0], self.decode) + .map_err(|_| de::value::Error::custom( + format!("can not parse {:?} to a {}", &self.req.match_info()[0], $tp) + ))?; + visitor.$visit_fn(v_parsed) } } } @@ -35,11 +52,12 @@ macro_rules! parse_single_value { pub struct PathDeserializer<'de, S: 'de> { req: &'de HttpRequest, + decode: bool, } impl<'de, S: 'de> PathDeserializer<'de, S> { - pub fn new(req: &'de HttpRequest) -> Self { - PathDeserializer { req } + pub fn new(req: &'de HttpRequest, decode: bool) -> Self { + PathDeserializer { req, decode } } } @@ -53,6 +71,7 @@ impl<'de, S: 'de> Deserializer<'de> for PathDeserializer<'de, S> { visitor.visit_map(ParamsDeserializer { params: self.req.match_info().iter(), current: None, + decode: self.decode, }) } @@ -107,6 +126,7 @@ impl<'de, S: 'de> Deserializer<'de> for PathDeserializer<'de, S> { } else { visitor.visit_seq(ParamsSeq { params: self.req.match_info().iter(), + decode: self.decode, }) } } @@ -128,6 +148,7 @@ impl<'de, S: 'de> Deserializer<'de> for PathDeserializer<'de, S> { } else { visitor.visit_seq(ParamsSeq { params: self.req.match_info().iter(), + decode: self.decode, }) } } @@ -141,28 +162,13 @@ impl<'de, S: 'de> Deserializer<'de> for PathDeserializer<'de, S> { Err(de::value::Error::custom("unsupported type: enum")) } - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - if self.req.match_info().len() != 1 { - Err(de::value::Error::custom( - format!( - "wrong number of parameters: {} expected 1", - self.req.match_info().len() - ).as_str(), - )) - } else { - visitor.visit_str(&self.req.match_info()[0]) - } - } - fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_seq(ParamsSeq { params: self.req.match_info().iter(), + decode: self.decode, }) } @@ -184,13 +190,16 @@ impl<'de, S: 'de> Deserializer<'de> for PathDeserializer<'de, S> { parse_single_value!(deserialize_f32, visit_f32, "f32"); parse_single_value!(deserialize_f64, visit_f64, "f64"); parse_single_value!(deserialize_string, visit_string, "String"); + parse_single_value!(deserialize_str, visit_string, "String"); parse_single_value!(deserialize_byte_buf, visit_string, "String"); parse_single_value!(deserialize_char, visit_char, "char"); + } struct ParamsDeserializer<'de> { params: ParamsIter<'de>, current: Option<(&'de str, &'de str)>, + decode: bool, } impl<'de> de::MapAccess<'de> for ParamsDeserializer<'de> { @@ -212,7 +221,7 @@ impl<'de> de::MapAccess<'de> for ParamsDeserializer<'de> { V: de::DeserializeSeed<'de>, { if let Some((_, value)) = self.current.take() { - seed.deserialize(Value { value }) + seed.deserialize(Value { value, decode: self.decode }) } else { Err(de::value::Error::custom("unexpected item")) } @@ -252,16 +261,18 @@ macro_rules! parse_value { fn $trait_fn(self, visitor: V) -> Result where V: Visitor<'de> { - let v = self.value.parse().map_err( - |_| de::value::Error::custom( - format!("can not parse {:?} to a {}", self.value, $tp)))?; - visitor.$visit_fn(v) + let v_parsed = percent_decode_if_needed!(&self.value, self.decode) + .map_err(|_| de::value::Error::custom( + format!("can not parse {:?} to a {}", &self.value, $tp) + ))?; + visitor.$visit_fn(v_parsed) } } } struct Value<'de> { value: &'de str, + decode: bool, } impl<'de> Deserializer<'de> for Value<'de> { @@ -377,6 +388,7 @@ impl<'de> Deserializer<'de> for Value<'de> { struct ParamsSeq<'de> { params: ParamsIter<'de>, + decode: bool, } impl<'de> de::SeqAccess<'de> for ParamsSeq<'de> { @@ -387,7 +399,7 @@ impl<'de> de::SeqAccess<'de> for ParamsSeq<'de> { T: de::DeserializeSeed<'de>, { match self.params.next() { - Some(item) => Ok(Some(seed.deserialize(Value { value: item.1 })?)), + Some(item) => Ok(Some(seed.deserialize(Value { value: item.1, decode: self.decode })?)), None => Ok(None), } } diff --git a/src/extractor.rs b/src/extractor.rs index 45e29ace0..717e0f6c1 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -18,7 +18,8 @@ use httpmessage::{HttpMessage, MessageBody, UrlEncoded}; use httprequest::HttpRequest; #[derive(PartialEq, Eq, PartialOrd, Ord)] -/// Extract typed information from the request's path. +/// Extract typed information from the request's path. Information from the path is +/// URL decoded. Decoding of special characters can be disabled through `PathConfig`. /// /// ## Example /// @@ -119,7 +120,7 @@ where let req = req.clone(); let req2 = req.clone(); let err = Rc::clone(&cfg.ehandler); - de::Deserialize::deserialize(PathDeserializer::new(&req)) + de::Deserialize::deserialize(PathDeserializer::new(&req, cfg.decode)) .map_err(move |e| (*err)(e, &req2)) .map(|inner| Path { inner }) } @@ -149,6 +150,7 @@ where /// ``` pub struct PathConfig { ehandler: Rc) -> Error>, + decode: bool, } impl PathConfig { /// Set custom error handler @@ -159,12 +161,20 @@ impl PathConfig { self.ehandler = Rc::new(f); self } + + /// Disable decoding of URL encoded special charaters from the path + pub fn disable_decoding(&mut self) -> &mut Self + { + self.decode = false; + self + } } impl Default for PathConfig { fn default() -> Self { PathConfig { ehandler: Rc::new(|e, _| ErrorNotFound(e)), + decode: true, } } } @@ -1090,6 +1100,68 @@ mod tests { assert_eq!(*Path::::from_request(&req, &&PathConfig::default()).unwrap(), 32); } + #[test] + fn test_extract_path_decode() { + let mut router = Router::<()>::default(); + router.register_resource(Resource::new(ResourceDef::new("/{value}/"))); + + macro_rules! test_single_value { + ($value:expr, $expected:expr) => { + { + let req = TestRequest::with_uri($value).finish(); + let info = router.recognize(&req, &(), 0); + let req = req.with_route_info(info); + assert_eq!(*Path::::from_request(&req, &PathConfig::default()).unwrap(), $expected); + } + } + } + + test_single_value!("/%25/", "%"); + test_single_value!("/%40%C2%A3%24%25%5E%26%2B%3D/", "@£$%^&+="); + test_single_value!("/%2B/", "+"); + test_single_value!("/%252B/", "%2B"); + test_single_value!("/%2F/", "/"); + test_single_value!("/%252F/", "%2F"); + test_single_value!("/http%3A%2F%2Flocalhost%3A80%2Ffoo/", "http://localhost:80/foo"); + test_single_value!("/%2Fvar%2Flog%2Fsyslog/", "/var/log/syslog"); + test_single_value!( + "/http%3A%2F%2Flocalhost%3A80%2Ffile%2F%252Fvar%252Flog%252Fsyslog/", + "http://localhost:80/file/%2Fvar%2Flog%2Fsyslog" + ); + + let req = TestRequest::with_uri("/%25/7/?id=test").finish(); + + let mut router = Router::<()>::default(); + router.register_resource(Resource::new(ResourceDef::new("/{key}/{value}/"))); + let info = router.recognize(&req, &(), 0); + let req = req.with_route_info(info); + + let s = Path::::from_request(&req, &PathConfig::default()).unwrap(); + assert_eq!(s.key, "%"); + assert_eq!(s.value, 7); + + let s = Path::<(String, String)>::from_request(&req, &PathConfig::default()).unwrap(); + assert_eq!(s.0, "%"); + assert_eq!(s.1, "7"); + } + + #[test] + fn test_extract_path_no_decode() { + let mut router = Router::<()>::default(); + router.register_resource(Resource::new(ResourceDef::new("/{value}/"))); + + let req = TestRequest::with_uri("/%25/").finish(); + let info = router.recognize(&req, &(), 0); + let req = req.with_route_info(info); + assert_eq!( + *Path::::from_request( + &req, + &&PathConfig::default().disable_decoding() + ).unwrap(), + "%25" + ); + } + #[test] fn test_tuple_extract() { let mut router = Router::<()>::default(); diff --git a/src/param.rs b/src/param.rs index d0664df99..a3f602599 100644 --- a/src/param.rs +++ b/src/param.rs @@ -8,7 +8,7 @@ use http::StatusCode; use smallvec::SmallVec; use error::{InternalError, ResponseError, UriSegmentError}; -use uri::Url; +use uri::{Url, RESERVED_QUOTER}; /// A trait to abstract the idea of creating a new instance of a type from a /// path parameter. @@ -103,6 +103,17 @@ impl Params { } } + /// Get URL-decoded matched parameter by name without type conversion + pub fn get_decoded(&self, key: &str) -> Option { + self.get(key).map(|value| { + if let Some(ref mut value) = RESERVED_QUOTER.requote(value.as_bytes()) { + Rc::make_mut(value).to_string() + } else { + value.to_string() + } + }) + } + /// Get unprocessed part of path pub fn unprocessed(&self) -> &str { &self.url.path()[(self.tail as usize)..] @@ -300,4 +311,24 @@ mod tests { Ok(PathBuf::from_iter(vec!["seg2"])) ); } + + #[test] + fn test_get_param_by_name() { + let mut params = Params::new(); + params.add_static("item1", "path"); + params.add_static("item2", "http%3A%2F%2Flocalhost%3A80%2Ffoo"); + + assert_eq!(params.get("item0"), None); + assert_eq!(params.get_decoded("item0"), None); + assert_eq!(params.get("item1"), Some("path")); + assert_eq!(params.get_decoded("item1"), Some("path".to_string())); + assert_eq!( + params.get("item2"), + Some("http%3A%2F%2Flocalhost%3A80%2Ffoo") + ); + assert_eq!( + params.get_decoded("item2"), + Some("http://localhost:80/foo".to_string()) + ); + } } diff --git a/src/uri.rs b/src/uri.rs index 881cf20a8..c87cb3d5b 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -1,25 +1,12 @@ use http::Uri; use std::rc::Rc; -#[allow(dead_code)] -const GEN_DELIMS: &[u8] = b":/?#[]@"; -#[allow(dead_code)] -const SUB_DELIMS_WITHOUT_QS: &[u8] = b"!$'()*,"; -#[allow(dead_code)] -const SUB_DELIMS: &[u8] = b"!$'()*,+?=;"; -#[allow(dead_code)] -const RESERVED: &[u8] = b":/?#[]@!$'()*,+?=;"; -#[allow(dead_code)] -const UNRESERVED: &[u8] = b"abcdefghijklmnopqrstuvwxyz - ABCDEFGHIJKLMNOPQRSTUVWXYZ - 1234567890 - -._~"; -const ALLOWED: &[u8] = b"abcdefghijklmnopqrstuvwxyz - ABCDEFGHIJKLMNOPQRSTUVWXYZ - 1234567890 - -._~ - !$'()*,"; -const QS: &[u8] = b"+&=;b"; +// https://tools.ietf.org/html/rfc3986#section-2.2 +const RESERVED_PLUS_EXTRA: &[u8] = b":/?#[]@!$&'()*,+?;=%^ <>\"\\`{}|"; + +// https://tools.ietf.org/html/rfc3986#section-2.3 +const UNRESERVED: &[u8] = + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890-._~"; #[inline] fn bit_at(array: &[u8], ch: u8) -> bool { @@ -32,7 +19,8 @@ fn set_bit(array: &mut [u8], ch: u8) { } lazy_static! { - static ref DEFAULT_QUOTER: Quoter = { Quoter::new(b"@:", b"/+") }; + static ref UNRESERVED_QUOTER: Quoter = { Quoter::new(UNRESERVED) }; + pub(crate) static ref RESERVED_QUOTER: Quoter = { Quoter::new(RESERVED_PLUS_EXTRA) }; } #[derive(Default, Clone, Debug)] @@ -43,7 +31,7 @@ pub(crate) struct Url { impl Url { pub fn new(uri: Uri) -> Url { - let path = DEFAULT_QUOTER.requote(uri.path().as_bytes()); + let path = UNRESERVED_QUOTER.requote(uri.path().as_bytes()); Url { uri, path } } @@ -63,36 +51,19 @@ impl Url { pub(crate) struct Quoter { safe_table: [u8; 16], - protected_table: [u8; 16], } impl Quoter { - pub fn new(safe: &[u8], protected: &[u8]) -> Quoter { + pub fn new(safe: &[u8]) -> Quoter { let mut q = Quoter { safe_table: [0; 16], - protected_table: [0; 16], }; // prepare safe table - for i in 0..128 { - if ALLOWED.contains(&i) { - set_bit(&mut q.safe_table, i); - } - if QS.contains(&i) { - set_bit(&mut q.safe_table, i); - } - } - for ch in safe { set_bit(&mut q.safe_table, *ch) } - // prepare protected table - for ch in protected { - set_bit(&mut q.safe_table, *ch); - set_bit(&mut q.protected_table, *ch); - } - q } @@ -115,19 +86,17 @@ impl Quoter { if let Some(ch) = restore_ch(pct[1], pct[2]) { if ch < 128 { - if bit_at(&self.protected_table, ch) { - buf.extend_from_slice(&pct); - idx += 1; - continue; - } - if bit_at(&self.safe_table, ch) { buf.push(ch); idx += 1; continue; } + + buf.extend_from_slice(&pct); + } else { + // Not ASCII, decode it + buf.push(ch); } - buf.push(ch); } else { buf.extend_from_slice(&pct[..]); } @@ -172,3 +141,37 @@ fn from_hex(v: u8) -> Option { fn restore_ch(d1: u8, d2: u8) -> Option { from_hex(d1).and_then(|d1| from_hex(d2).and_then(move |d2| Some(d1 << 4 | d2))) } + + +#[cfg(test)] +mod tests { + use std::rc::Rc; + + use super::*; + + #[test] + fn decode_path() { + assert_eq!(UNRESERVED_QUOTER.requote(b"https://localhost:80/foo"), None); + + assert_eq!( + Rc::try_unwrap(UNRESERVED_QUOTER.requote( + b"https://localhost:80/foo%25" + ).unwrap()).unwrap(), + "https://localhost:80/foo%25".to_string() + ); + + assert_eq!( + Rc::try_unwrap(UNRESERVED_QUOTER.requote( + b"http://cache-service/http%3A%2F%2Flocalhost%3A80%2Ffoo" + ).unwrap()).unwrap(), + "http://cache-service/http%3A%2F%2Flocalhost%3A80%2Ffoo".to_string() + ); + + assert_eq!( + Rc::try_unwrap(UNRESERVED_QUOTER.requote( + b"http://cache/http%3A%2F%2Flocal%3A80%2Ffile%2F%252Fvar%252Flog%0A" + ).unwrap()).unwrap(), + "http://cache/http%3A%2F%2Flocal%3A80%2Ffile%2F%252Fvar%252Flog%0A".to_string() + ); + } +} \ No newline at end of file diff --git a/tests/test_handlers.rs b/tests/test_handlers.rs index 3ea709c92..debc1626a 100644 --- a/tests/test_handlers.rs +++ b/tests/test_handlers.rs @@ -672,6 +672,6 @@ fn test_unsafe_path_route() { let bytes = srv.execute(response.body()).unwrap(); assert_eq!( bytes, - Bytes::from_static(b"success: http:%2F%2Fexample.com") + Bytes::from_static(b"success: http%3A%2F%2Fexample.com") ); }