1
0
Fork 0
mirror of https://github.com/sile/hls_m3u8.git synced 2024-06-10 01:09:27 +00:00

use chars instead of bytes in the attribute parser

This commit is contained in:
Luro02 2020-03-16 11:17:52 +01:00
parent e338f5f95f
commit 187174042d
No known key found for this signature in database
GPG key ID: B66FD4F74501A9CF

View file

@ -14,7 +14,7 @@ impl<'a> Iterator for AttributePairs<'a> {
type Item = (&'a str, &'a str); type Item = (&'a str, &'a str);
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
// return `None`, if there are no more chars // return `None`, if there are no more bytes
self.string.as_bytes().get(self.index + 1)?; self.string.as_bytes().get(self.index + 1)?;
let key = { let key = {
@ -23,41 +23,55 @@ impl<'a> Iterator for AttributePairs<'a> {
// the key ends at an `=`: // the key ends at an `=`:
let end = self let end = self
.string .string
.bytes() .char_indices()
.skip(self.index) .skip_while(|(i, _)| *i < self.index)
.position(|i| i == b'=')? .find_map(|(i, c)| if c == '=' { Some(i) } else { None })?;
+ start;
// advance the index to the 2nd char after the end of the key // advance the index to the char after the end of the key (to skip the `=`)
// (this will skip the `=`) // NOTE: it is okay to add 1 to the index, because an `=` is exactly 1 byte.
self.index = end + 1; self.index = end + 1;
core::str::from_utf8(&self.string.as_bytes()[start..end]).unwrap() ::core::str::from_utf8(&self.string.as_bytes()[start..end]).unwrap()
}; };
let value = { let value = {
let start = self.index; let start = self.index;
let mut end = 0;
// find the end of the value by searching for `,`. // find the end of the value by searching for `,`.
// it should ignore `,` that are inside double quotes. // it should ignore `,` that are inside double quotes.
let mut inside_quotes = false; let mut inside_quotes = false;
while let Some(item) = self.string.as_bytes().get(start + end) {
end += 1;
if *item == b'"' { let end = {
inside_quotes = !inside_quotes; let mut result = self.string.len();
} else if *item == b',' && !inside_quotes {
self.index += 1; for (i, c) in self
end -= 1; .string
break; .char_indices()
.skip_while(|(i, _)| *i < self.index)
{
// if a quote is encountered
if c == '"' {
// update variable
inside_quotes = !inside_quotes;
// terminate if a comma is encountered, which is not in a
// quote
} else if c == ',' && !inside_quotes {
// move the index past the comma
self.index += 1;
// the result is the index of the comma (comma is not included in the
// resulting string)
result = i;
break;
}
} }
}
result
};
self.index += end; self.index += end;
end += start; self.index -= start;
core::str::from_utf8(&self.string.as_bytes()[start..end]).unwrap() ::core::str::from_utf8(&self.string.as_bytes()[start..end]).unwrap()
}; };
Some((key.trim(), value.trim())) Some((key.trim(), value.trim()))
@ -69,10 +83,15 @@ impl<'a> Iterator for AttributePairs<'a> {
// each `=` in the remaining str is an iteration // each `=` in the remaining str is an iteration
// this also ignores `=` inside quotes! // this also ignores `=` inside quotes!
let mut inside_quotes = false; let mut inside_quotes = false;
for c in self.string.as_bytes().iter().skip(self.index) {
if *c == b'=' && !inside_quotes { for (_, c) in self
.string
.char_indices()
.skip_while(|(i, _)| *i < self.index)
{
if c == '=' && !inside_quotes {
remaining += 1; remaining += 1;
} else if *c == b'"' { } else if c == '"' {
inside_quotes = !inside_quotes; inside_quotes = !inside_quotes;
} }
} }
@ -92,44 +111,53 @@ mod test {
#[test] #[test]
fn test_attributes() { fn test_attributes() {
let mut attributes = AttributePairs::new("KEY=VALUE,PAIR=YES"); let mut attributes = AttributePairs::new("KEY=VALUE,PAIR=YES");
assert_eq!((2, Some(2)), attributes.size_hint()); assert_eq!((2, Some(2)), attributes.size_hint());
assert_eq!(Some(("KEY", "VALUE")), attributes.next()); assert_eq!(attributes.next(), Some(("KEY", "VALUE")));
assert_eq!((1, Some(1)), attributes.size_hint()); assert_eq!((1, Some(1)), attributes.size_hint());
assert_eq!(Some(("PAIR", "YES")), attributes.next()); assert_eq!(attributes.next(), Some(("PAIR", "YES")));
assert_eq!((0, Some(0)), attributes.size_hint()); assert_eq!((0, Some(0)), attributes.size_hint());
assert_eq!(None, attributes.next()); assert_eq!(attributes.next(), None);
let mut attributes = AttributePairs::new("garbage"); let mut attributes = AttributePairs::new("garbage");
assert_eq!((0, Some(0)), attributes.size_hint()); assert_eq!((0, Some(0)), attributes.size_hint());
assert_eq!(None, attributes.next()); assert_eq!(attributes.next(), None);
let mut attributes = AttributePairs::new("KEY=,=VALUE,=,"); let mut attributes = AttributePairs::new("KEY=,=VALUE,=,");
assert_eq!((3, Some(3)), attributes.size_hint()); assert_eq!((3, Some(3)), attributes.size_hint());
assert_eq!(Some(("KEY", "")), attributes.next()); assert_eq!(attributes.next(), Some(("KEY", "")));
assert_eq!((2, Some(2)), attributes.size_hint()); assert_eq!((2, Some(2)), attributes.size_hint());
assert_eq!(Some(("", "VALUE")), attributes.next()); assert_eq!(attributes.next(), Some(("", "VALUE")));
assert_eq!((1, Some(1)), attributes.size_hint()); assert_eq!((1, Some(1)), attributes.size_hint());
assert_eq!(Some(("", "")), attributes.next()); assert_eq!(attributes.next(), Some(("", "")));
assert_eq!((0, Some(0)), attributes.size_hint()); assert_eq!((0, Some(0)), attributes.size_hint());
assert_eq!(None, attributes.next()); assert_eq!(attributes.next(), None);
// test quotes: // test quotes:
let mut attributes = AttributePairs::new("KEY=\"VALUE,\","); let mut attributes = AttributePairs::new("KEY=\"VALUE,\",");
assert_eq!((1, Some(1)), attributes.size_hint()); assert_eq!((1, Some(1)), attributes.size_hint());
assert_eq!(Some(("KEY", "\"VALUE,\"")), attributes.next()); assert_eq!(attributes.next(), Some(("KEY", "\"VALUE,\"")));
assert_eq!((0, Some(0)), attributes.size_hint()); assert_eq!((0, Some(0)), attributes.size_hint());
assert_eq!(None, attributes.next()); assert_eq!(attributes.next(), None);
// test with chars, that are larger, than 1 byte // test with chars, that are larger, than 1 byte
let mut attributes = AttributePairs::new( let mut attributes = AttributePairs::new(concat!(
"LANGUAGE=\"fre\",\ "LANGUAGE=\"fre\",",
NAME=\"Français\",\ "NAME=\"Français\",",
AUTOSELECT=YES", "AUTOSELECT=YES"
); ));
assert_eq!(Some(("LANGUAGE", "\"fre\"")), attributes.next()); assert_eq!(attributes.next(), Some(("LANGUAGE", "\"fre\"")));
assert_eq!(Some(("NAME", "\"Français\"")), attributes.next()); assert_eq!(attributes.next(), Some(("NAME", "\"Français\"")));
assert_eq!(Some(("AUTOSELECT", "YES")), attributes.next()); assert_eq!(attributes.next(), Some(("AUTOSELECT", "YES")));
} }
#[test] #[test]
@ -140,5 +168,41 @@ mod test {
assert_eq!(pairs.next(), Some(("BAR", "\"baz,qux\""))); assert_eq!(pairs.next(), Some(("BAR", "\"baz,qux\"")));
assert_eq!(pairs.next(), Some(("ABC", "12.3"))); assert_eq!(pairs.next(), Some(("ABC", "12.3")));
assert_eq!(pairs.next(), None); assert_eq!(pairs.next(), None);
// stress test with foreign input
// got it from https://generator.lorem-ipsum.info/_chinese
let mut pairs = AttributePairs::new(concat!(
"載抗留囲軽来実基供全必式覧領意度振。=著地内方満職控努作期投綱研本模,",
"後文図様改表宮能本園半参裁報作神掲索=\"針支年得率新賞現報発援白少動面。矢拉年世掲注索政平定他込\",",
"ध्वनि स्थिति और्४५० नीचे =देखने लाभो द्वारा करके(विशेष"
));
assert_eq!((3, Some(3)), pairs.size_hint());
assert_eq!(
pairs.next(),
Some((
"載抗留囲軽来実基供全必式覧領意度振。",
"著地内方満職控努作期投綱研本模"
))
);
assert_eq!((2, Some(2)), pairs.size_hint());
assert_eq!(
pairs.next(),
Some((
"後文図様改表宮能本園半参裁報作神掲索",
"\"針支年得率新賞現報発援白少動面。矢拉年世掲注索政平定他込\""
))
);
assert_eq!((1, Some(1)), pairs.size_hint());
assert_eq!(
pairs.next(),
Some(("ध्वनि स्थिति और्४५० नीचे", "देखने लाभो द्वारा करके(विशेष"))
);
assert_eq!((0, Some(0)), pairs.size_hint());
assert_eq!(pairs.next(), None);
} }
} }