closedcaption: Switch SCC parser from combine to nom

nom gives easier to understand compiler errors when something is wrong
and compiles considerably faster.
This commit is contained in:
Sebastian Dröge 2020-11-05 10:33:19 +02:00
parent d56ae71e0e
commit ffae72cb0f
3 changed files with 197 additions and 193 deletions

View file

@ -10,6 +10,7 @@ repository = "https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs"
[dependencies] [dependencies]
glib = { git = "https://github.com/gtk-rs/gtk-rs" } glib = { git = "https://github.com/gtk-rs/gtk-rs" }
combine = "4.0" combine = "4.0"
nom = "6.0"
either = "1" either = "1"
uuid = { version = "0.8", features = ["v4"] } uuid = { version = "0.8", features = ["v4"] }
chrono = "0.4" chrono = "0.4"

View file

@ -47,8 +47,6 @@ struct State {
last_timecode: Option<gst_video::ValidVideoTimeCode>, last_timecode: Option<gst_video::ValidVideoTimeCode>,
} }
type CombineError<'a> = combine::easy::ParseError<&'a [u8]>;
impl Default for State { impl Default for State {
fn default() -> Self { fn default() -> Self {
Self { Self {
@ -64,7 +62,10 @@ impl Default for State {
} }
impl State { impl State {
fn get_line(&mut self, drain: bool) -> Result<Option<SccLine>, (&[u8], CombineError)> { fn get_line(
&mut self,
drain: bool,
) -> Result<Option<SccLine>, (&[u8], nom::error::Error<&[u8]>)> {
let line = match self.reader.get_line_with_drain(drain) { let line = match self.reader.get_line_with_drain(drain) {
None => { None => {
return Ok(None); return Ok(None);

View file

@ -1,4 +1,4 @@
// Copyright (C) 2019 Sebastian Dröge <sebastian@centricular.com> // Copyright (C) 2019,2020 Sebastian Dröge <sebastian@centricular.com>
// Copyright (C) 2019 Jordan Petridis <jordan@centricular.com> // Copyright (C) 2019 Jordan Petridis <jordan@centricular.com>
// //
// This library is free software; you can redistribute it and/or // This library is free software; you can redistribute it and/or
@ -16,11 +16,7 @@
// Free Software Foundation, Inc., 51 Franklin Street, Suite 500, // Free Software Foundation, Inc., 51 Franklin Street, Suite 500,
// Boston, MA 02110-1335, USA. // Boston, MA 02110-1335, USA.
use combine::parser::byte::hex_digit; use nom::IResult;
use combine::parser::range::{range, take_while1};
use combine::parser::EasyParser;
use combine::{choice, eof, from_str, many1, one_of, optional, token, unexpected_any, value};
use combine::{ParseError, Parser, RangeStream};
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct TimeCode { pub struct TimeCode {
@ -51,164 +47,161 @@ pub struct SccParser {
} }
/// Parser for parsing a run of ASCII, decimal digits and converting them into a `u32` /// Parser for parsing a run of ASCII, decimal digits and converting them into a `u32`
fn digits<'a, I: 'a>() -> impl Parser<I, Output = u32> fn digits(s: &[u8]) -> IResult<&[u8], u32> {
where use nom::bytes::complete::take_while;
I: RangeStream<Token = u8, Range = &'a [u8]>, use nom::character::is_digit;
I::Error: ParseError<I::Token, I::Range, I::Position>, use nom::combinator::map_res;
{
from_str(take_while1(|c: u8| c >= b'0' && c <= b'9').message("while parsing digits")) map_res(
map_res(take_while(is_digit), |s: &[u8]| std::str::from_utf8(s)),
|s: &str| s.parse::<u32>(),
)(s)
} }
/// Parser for a run of decimal digits, that converts them into a `u32` and checks if the result is /// Parser for a run of decimal digits, that converts them into a `u32` and checks if the result is
/// in the allowed range. /// in the allowed range.
fn digits_range<'a, I: 'a, R: std::ops::RangeBounds<u32>>(range: R) -> impl Parser<I, Output = u32> fn digits_range<R: std::ops::RangeBounds<u32>>(
where range: R,
I: RangeStream<Token = u8, Range = &'a [u8]>, ) -> impl FnMut(&[u8]) -> IResult<&[u8], u32> {
I::Error: ParseError<I::Token, I::Range, I::Position>, use nom::combinator::verify;
{ use nom::error::context;
digits().then(move |v| {
if range.contains(&v) { move |s: &[u8]| context("digits out of range", verify(digits, |v| range.contains(v)))(s)
value(v).left()
} else {
unexpected_any("digits out of range").right()
}
})
} }
/// Parser for a timecode in the form `hh:mm:ss:fs` /// Parser for a timecode in the form `hh:mm:ss:fs`
fn timecode<'a, I: 'a>() -> impl Parser<I, Output = TimeCode> fn timecode(s: &[u8]) -> IResult<&[u8], TimeCode> {
where use nom::character::complete::{char, one_of};
I: RangeStream<Token = u8, Range = &'a [u8]>, use nom::combinator::map;
I::Error: ParseError<I::Token, I::Range, I::Position>, use nom::error::context;
{ use nom::sequence::tuple;
(
digits(), context(
token(b':'), "invalid timecode",
digits_range(0..60), map(
token(b':'), tuple((
digits_range(0..60), digits,
one_of([b':', b'.', b';', b','].iter().cloned()), char(':'),
digits(), digits_range(0..60),
) char(':'),
.map(|(hours, _, minutes, _, seconds, sep, frames)| TimeCode { digits_range(0..60),
hours, one_of(":.;,"),
minutes, digits,
seconds, )),
frames, |(hours, _, minutes, _, seconds, sep, frames)| TimeCode {
drop_frame: sep == b';' || sep == b',', hours,
}) minutes,
.message("while parsing timecode") seconds,
frames,
drop_frame: sep == ';' || sep == ',',
},
),
)(s)
} }
/// Parser that checks for EOF and optionally `\n` or `\r\n` before EOF /// Parser that checks for EOF and optionally `\n` or `\r\n` before EOF
fn end_of_line<'a, I: 'a>() -> impl Parser<I, Output = ()> + 'a fn end_of_line(s: &[u8]) -> IResult<&[u8], ()> {
where use nom::branch::alt;
I: RangeStream<Token = u8, Range = &'a [u8]>, use nom::bytes::complete::tag;
I::Error: ParseError<I::Token, I::Range, I::Position>, use nom::combinator::{eof, map, opt};
{ use nom::sequence::pair;
(
optional(choice((range(b"\n".as_ref()), range(b"\r\n".as_ref())))), map(pair(opt(alt((tag("\r\n"), tag("\n")))), eof), |_| ())(s)
eof(),
)
.map(|_| ())
.message("while parsing end of line")
} }
/// Parser for the SCC header /// Parser for the SCC header
fn header<'a, I: 'a>() -> impl Parser<I, Output = SccLine> + 'a fn header(s: &[u8]) -> IResult<&[u8], SccLine> {
where use nom::bytes::complete::tag;
I: RangeStream<Token = u8, Range = &'a [u8]>, use nom::combinator::{map, opt};
I::Error: ParseError<I::Token, I::Range, I::Position>, use nom::error::context;
{ use nom::sequence::tuple;
(
optional(range(&[0xEFu8, 0xBBu8, 0xBFu8][..])), context(
range(b"Scenarist_SCC V1.0".as_ref()), "invalid header",
end_of_line(), map(
) tuple((
.map(|_| SccLine::Header) opt(tag(&[0xEFu8, 0xBBu8, 0xBFu8][..])),
.message("while parsing header") tag("Scenarist_SCC V1.0"),
end_of_line,
)),
|_| SccLine::Header,
),
)(s)
} }
/// Parser that accepts only an empty line /// Parser that accepts only an empty line
fn empty_line<'a, I: 'a>() -> impl Parser<I, Output = SccLine> + 'a fn empty_line(s: &[u8]) -> IResult<&[u8], SccLine> {
where use nom::combinator::map;
I: RangeStream<Token = u8, Range = &'a [u8]>, use nom::error::context;
I::Error: ParseError<I::Token, I::Range, I::Position>,
{ context("invalid empty line", map(end_of_line, |_| SccLine::Empty))(s)
end_of_line()
.map(|_| SccLine::Empty)
.message("while parsing empty line")
} }
/// A single SCC payload item. This is ASCII hex encoded bytes. /// A single SCC payload item. This is ASCII hex encoded bytes.
/// It returns an tuple of `(u8, u8)` of the hex encoded bytes. /// It returns an tuple of `(u8, u8)` of the hex encoded bytes.
fn scc_payload_item<'a, I: 'a>() -> impl Parser<I, Output = (u8, u8)> fn scc_payload_item(s: &[u8]) -> IResult<&[u8], (u8, u8)> {
where use nom::bytes::complete::take_while_m_n;
I: RangeStream<Token = u8, Range = &'a [u8]>, use nom::character::is_hex_digit;
I::Error: ParseError<I::Token, I::Range, I::Position>, use nom::combinator::map;
{ use nom::error::context;
((hex_digit(), hex_digit(), hex_digit(), hex_digit()).map(|(u, l, m, n)| {
let hex_to_u8 = |v: u8| match v {
v if v >= b'0' && v <= b'9' => v - b'0',
v if v >= b'A' && v <= b'F' => 10 + v - b'A',
v if v >= b'a' && v <= b'f' => 10 + v - b'a',
_ => unreachable!(),
};
let val = (hex_to_u8(u) << 4) | hex_to_u8(l);
let val2 = (hex_to_u8(m) << 4) | hex_to_u8(n);
(val, val2)
}))
.message("while parsing SCC payload")
}
/// A wrapper around `Vec<u8>` that implements `Extend` in a special way. It can be context(
/// extended from a `(u8, u8)` while the default `Extend` implementation for "invalid SCC payload item",
/// `Vec` only allows to extend over vector items. map(take_while_m_n(4, 4, is_hex_digit), |s: &[u8]| {
struct VecExtend(Vec<u8>); let hex_to_u8 = |v: u8| match v {
v if v >= b'0' && v <= b'9' => v - b'0',
v if v >= b'A' && v <= b'F' => 10 + v - b'A',
v if v >= b'a' && v <= b'f' => 10 + v - b'a',
_ => unreachable!(),
};
impl Default for VecExtend { let val1 = (hex_to_u8(s[0]) << 4) | hex_to_u8(s[1]);
fn default() -> Self { let val2 = (hex_to_u8(s[2]) << 4) | hex_to_u8(s[3]);
VecExtend(Vec::with_capacity(256))
}
}
impl Extend<(u8, u8)> for VecExtend { (val1, val2)
fn extend<T>(&mut self, iter: T) }),
where )(s)
T: IntoIterator<Item = (u8, u8)>,
{
for (item, item2) in iter {
self.0.extend_from_slice(&[item, item2]);
}
}
} }
/// Parser for the whole SCC payload with conversion to the underlying byte values. /// Parser for the whole SCC payload with conversion to the underlying byte values.
fn scc_payload<'a, I: 'a>() -> impl Parser<I, Output = Vec<u8>> + 'a fn scc_payload(s: &[u8]) -> IResult<&[u8], Vec<u8>> {
where use nom::branch::alt;
I: RangeStream<Token = u8, Range = &'a [u8]>, use nom::bytes::complete::tag;
I::Error: ParseError<I::Token, I::Range, I::Position>, use nom::combinator::map;
{ use nom::error::context;
many1( use nom::multi::fold_many1;
( use nom::sequence::pair;
scc_payload_item(), use nom::Parser;
choice!(token(b' ').map(|_| ()), end_of_line()),
) let parse_item = map(
.map(|v| v.0), pair(scc_payload_item, alt((tag(" ").map(|_| ()), end_of_line))),
) |(item, _)| item,
.map(|v: VecExtend| v.0) );
.message("while parsing SCC payloads")
context(
"invalid SCC payload",
fold_many1(parse_item, Vec::new(), |mut acc: Vec<_>, item| {
acc.push(item.0);
acc.push(item.1);
acc
}),
)(s)
} }
/// Parser for a SCC caption line in the form `timecode\tpayload`. /// Parser for a SCC caption line in the form `timecode\tpayload`.
fn caption<'a, I: 'a>() -> impl Parser<I, Output = SccLine> + 'a fn caption(s: &[u8]) -> IResult<&[u8], SccLine> {
where use nom::bytes::complete::tag;
I: RangeStream<Token = u8, Range = &'a [u8]>, use nom::combinator::map;
I::Error: ParseError<I::Token, I::Range, I::Position>, use nom::error::context;
{ use nom::sequence::tuple;
(timecode(), token(b'\t'), scc_payload(), end_of_line())
.map(|(tc, _, value, _)| SccLine::Caption(tc, value)) context(
.message("while parsing caption") "invalid SCC caption line",
map(
tuple((timecode, tag("\t"), scc_payload, end_of_line)),
|(tc, _, value, _)| SccLine::Caption(tc, value),
),
)(s)
} }
/// SCC parser the parses line-by-line and keeps track of the current state in the file. /// SCC parser the parses line-by-line and keeps track of the current state in the file.
@ -226,26 +219,34 @@ impl SccParser {
pub fn parse_line<'a>( pub fn parse_line<'a>(
&mut self, &mut self,
line: &'a [u8], line: &'a [u8],
) -> Result<SccLine, combine::easy::ParseError<&'a [u8]>> { ) -> Result<SccLine, nom::error::Error<&'a [u8]>> {
use nom::branch::alt;
match self.state { match self.state {
State::Header => header() State::Header => header(line)
.message("while in Header state")
.easy_parse(line)
.map(|v| { .map(|v| {
self.state = State::Empty; self.state = State::Empty;
v.0 v.1
})
.map_err(|err| match err {
nom::Err::Incomplete(_) => unreachable!(),
nom::Err::Error(e) | nom::Err::Failure(e) => e,
}), }),
State::Empty => empty_line() State::Empty => empty_line(line)
.message("while in Empty state")
.easy_parse(line)
.map(|v| { .map(|v| {
self.state = State::CaptionOrEmpty; self.state = State::CaptionOrEmpty;
v.0 v.1
})
.map_err(|err| match err {
nom::Err::Incomplete(_) => unreachable!(),
nom::Err::Error(e) | nom::Err::Failure(e) => e,
}),
State::CaptionOrEmpty => alt((caption, empty_line))(line)
.map(|v| v.1)
.map_err(|err| match err {
nom::Err::Incomplete(_) => unreachable!(),
nom::Err::Error(e) | nom::Err::Failure(e) => e,
}), }),
State::CaptionOrEmpty => choice!(caption(), empty_line())
.message("while in CaptionOrEmpty state")
.easy_parse(line)
.map(|v| v.0),
} }
} }
} }
@ -253,14 +254,13 @@ impl SccParser {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use combine::error::UnexpectedParse;
#[test] #[test]
fn test_timecode() { fn test_timecode() {
let mut parser = timecode();
assert_eq!( assert_eq!(
parser.parse(b"11:12:13;14".as_ref()), timecode(b"11:12:13;14".as_ref()),
Ok(( Ok((
b"".as_ref(),
TimeCode { TimeCode {
hours: 11, hours: 11,
minutes: 12, minutes: 12,
@ -268,13 +268,13 @@ mod tests {
frames: 14, frames: 14,
drop_frame: true drop_frame: true
}, },
b"".as_ref()
)) ))
); );
assert_eq!( assert_eq!(
parser.parse(b"11:12:13:14".as_ref()), timecode(b"11:12:13:14".as_ref()),
Ok(( Ok((
b"".as_ref(),
TimeCode { TimeCode {
hours: 11, hours: 11,
minutes: 12, minutes: 12,
@ -282,13 +282,13 @@ mod tests {
frames: 14, frames: 14,
drop_frame: false drop_frame: false
}, },
b"".as_ref()
)) ))
); );
assert_eq!( assert_eq!(
parser.parse(b"11:12:13:14abcd".as_ref()), timecode(b"11:12:13:14abcd".as_ref()),
Ok(( Ok((
b"abcd".as_ref(),
TimeCode { TimeCode {
hours: 11, hours: 11,
minutes: 12, minutes: 12,
@ -296,70 +296,73 @@ mod tests {
frames: 14, frames: 14,
drop_frame: false drop_frame: false
}, },
b"abcd".as_ref()
)) ))
); );
assert_eq!( assert_eq!(
parser.parse(b"abcd11:12:13:14".as_ref()), timecode(b"abcd11:12:13:14".as_ref()),
Err(UnexpectedParse::Unexpected) Err(nom::Err::Error(nom::error::Error::new(
b"abcd11:12:13:14".as_ref(),
nom::error::ErrorKind::MapRes
))),
); );
} }
#[test] #[test]
fn test_header() { fn test_header() {
let mut parser = header();
assert_eq!( assert_eq!(
parser.parse(b"Scenarist_SCC V1.0".as_ref()), header(b"Scenarist_SCC V1.0".as_ref()),
Ok((SccLine::Header, b"".as_ref())) Ok((b"".as_ref(), SccLine::Header,))
); );
assert_eq!( assert_eq!(
parser.parse(b"Scenarist_SCC V1.0\n".as_ref()), header(b"Scenarist_SCC V1.0\n".as_ref()),
Ok((SccLine::Header, b"".as_ref())) Ok((b"".as_ref(), SccLine::Header))
); );
assert_eq!( assert_eq!(
parser.parse(b"Scenarist_SCC V1.0\r\n".as_ref()), header(b"Scenarist_SCC V1.0\r\n".as_ref()),
Ok((SccLine::Header, b"".as_ref())) Ok((b"".as_ref(), SccLine::Header))
); );
assert_eq!( assert_eq!(
parser.parse(b"Scenarist_SCC V1.1".as_ref()), header(b"Scenarist_SCC V1.1".as_ref()),
Err(UnexpectedParse::Unexpected) Err(nom::Err::Error(nom::error::Error::new(
b"Scenarist_SCC V1.1".as_ref(),
nom::error::ErrorKind::Tag
))),
); );
} }
#[test] #[test]
fn test_empty_line() { fn test_empty_line() {
let mut parser = empty_line(); assert_eq!(empty_line(b"".as_ref()), Ok((b"".as_ref(), SccLine::Empty)));
assert_eq!( assert_eq!(
parser.parse(b"".as_ref()), empty_line(b"\n".as_ref()),
Ok((SccLine::Empty, b"".as_ref())) Ok((b"".as_ref(), SccLine::Empty))
); );
assert_eq!( assert_eq!(
parser.parse(b"\n".as_ref()), empty_line(b"\r\n".as_ref()),
Ok((SccLine::Empty, b"".as_ref())) Ok((b"".as_ref(), SccLine::Empty))
); );
assert_eq!( assert_eq!(
parser.parse(b"\r\n".as_ref()), empty_line(b" \r\n".as_ref()),
Ok((SccLine::Empty, b"".as_ref())) Err(nom::Err::Error(nom::error::Error::new(
); b" \r\n".as_ref(),
nom::error::ErrorKind::Eof
assert_eq!( ))),
parser.parse(b" \r\n".as_ref()),
Err(UnexpectedParse::Unexpected)
); );
} }
#[test] #[test]
fn test_caption() { fn test_caption() {
let mut parser = caption();
assert_eq!( assert_eq!(
parser.parse(b"01:02:53:14\t94ae 94ae 9420 9420 947a 947a 97a2 97a2 a820 68ef f26e 2068 ef6e 6be9 6e67 2029 942c 942c 8080 8080 942f 942f".as_ref()), caption(b"01:02:53:14\t94ae 94ae 9420 9420 947a 947a 97a2 97a2 a820 68ef f26e 2068 ef6e 6be9 6e67 2029 942c 942c 8080 8080 942f 942f".as_ref()),
Ok(( Ok((
b"".as_ref(),
SccLine::Caption( SccLine::Caption(
TimeCode { TimeCode {
hours: 1, hours: 1,
@ -376,13 +379,13 @@ mod tests {
0x94, 0x2f, 0x94, 0x2f,
] ]
), ),
b"".as_ref()
)) ))
); );
assert_eq!( assert_eq!(
parser.parse(b"01:02:55;14 942c 942c".as_ref()), caption(b"01:02:55;14 942c 942c".as_ref()),
Ok(( Ok((
b"".as_ref(),
SccLine::Caption( SccLine::Caption(
TimeCode { TimeCode {
hours: 1, hours: 1,
@ -393,13 +396,13 @@ mod tests {
}, },
vec![0x94, 0x2c, 0x94, 0x2c] vec![0x94, 0x2c, 0x94, 0x2c]
), ),
b"".as_ref()
)) ))
); );
assert_eq!( assert_eq!(
parser.parse(b"01:03:27:29 94ae 94ae 9420 9420 94f2 94f2 c845 d92c 2054 c845 5245 ae80 942c 942c 8080 8080 942f 942f".as_ref()), caption(b"01:03:27:29 94ae 94ae 9420 9420 94f2 94f2 c845 d92c 2054 c845 5245 ae80 942c 942c 8080 8080 942f 942f".as_ref()),
Ok(( Ok((
b"".as_ref(),
SccLine::Caption( SccLine::Caption(
TimeCode { TimeCode {
hours: 1, hours: 1,
@ -414,13 +417,13 @@ mod tests {
0x80, 0x80, 0x80, 0x80, 0x94, 0x2f, 0x94, 0x2f, 0x80, 0x80, 0x80, 0x80, 0x94, 0x2f, 0x94, 0x2f,
] ]
), ),
b"".as_ref()
)) ))
); );
assert_eq!( assert_eq!(
parser.parse(b"00:00:00;00\t942c 942c".as_ref()), caption(b"00:00:00;00\t942c 942c".as_ref()),
Ok(( Ok((
b"".as_ref(),
SccLine::Caption( SccLine::Caption(
TimeCode { TimeCode {
hours: 0, hours: 0,
@ -431,13 +434,13 @@ mod tests {
}, },
vec![0x94, 0x2c, 0x94, 0x2c], vec![0x94, 0x2c, 0x94, 0x2c],
), ),
b"".as_ref()
)) ))
); );
assert_eq!( assert_eq!(
parser.parse(b"00:00:00;00\t942c 942c\r\n".as_ref()), caption(b"00:00:00;00\t942c 942c\r\n".as_ref()),
Ok(( Ok((
b"".as_ref(),
SccLine::Caption( SccLine::Caption(
TimeCode { TimeCode {
hours: 0, hours: 0,
@ -448,7 +451,6 @@ mod tests {
}, },
vec![0x94, 0x2c, 0x94, 0x2c], vec![0x94, 0x2c, 0x94, 0x2c],
), ),
b"".as_ref()
)) ))
); );
} }