From 433acfb2c6224b38b808a5429382f5b21a7d943b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Mon, 17 Jun 2024 13:46:50 +0300 Subject: [PATCH] rtp: av1pay: Correctly use N flag for marking keyframes The "first packet of a coded video sequence" means that this should be the first packet of a keyframe that comes together with a sequence header, not the first packet of a new frame. Fixes https://gitlab.freedesktop.org/gstreamer/gst-plugins-rs/-/issues/558 Part-of: --- net/rtp/src/av1/pay/imp.rs | 51 +++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/net/rtp/src/av1/pay/imp.rs b/net/rtp/src/av1/pay/imp.rs index ec65e8ac..804437ea 100644 --- a/net/rtp/src/av1/pay/imp.rs +++ b/net/rtp/src/av1/pay/imp.rs @@ -38,6 +38,7 @@ static CAT: Lazy = Lazy::new(|| { struct PacketOBUData { obu_count: usize, payload_size: u32, + start_of_coded_video_sequence: bool, last_obu_fragment_size: Option, omit_last_size_field: bool, ends_temporal_unit: bool, @@ -48,6 +49,7 @@ impl Default for PacketOBUData { PacketOBUData { payload_size: 1, // 1 byte is used for the aggregation header omit_last_size_field: true, + start_of_coded_video_sequence: false, obu_count: 0, last_obu_fragment_size: None, ends_temporal_unit: false, @@ -58,6 +60,7 @@ impl Default for PacketOBUData { #[derive(Clone, Debug, Default, PartialEq, Eq)] struct ObuData { info: SizedObu, + keyframe: bool, bytes: Vec, offset: usize, dts: Option, @@ -74,10 +77,6 @@ struct State { /// left over from the previous RTP packet open_obu_fragment: bool, - /// Indicates the next constructed packet will be the first in its sequence - /// (Corresponds to `N` field in the aggregation header) - first_packet_in_seq: bool, - /// The last observed DTS if upstream does not provide DTS for each OBU last_dts: Option, /// The last observed PTS if upstream does not provide PTS for each OBU @@ -97,7 +96,6 @@ impl Default for State { Self { obus: VecDeque::new(), open_obu_fragment: false, - first_packet_in_seq: true, last_dts: None, last_pts: None, framed: false, @@ -125,6 +123,7 @@ impl RTPAv1Pay { &self, state: &mut State, data: &[u8], + keyframe: bool, marker: bool, dts: Option, pts: Option, @@ -159,6 +158,7 @@ impl RTPAv1Pay { } state.obus.push_back(ObuData { info: obu, + keyframe, bytes: Vec::new(), offset: 0, dts, @@ -191,6 +191,7 @@ impl RTPAv1Pay { state.obus.push_back(ObuData { info: obu, + keyframe, bytes, offset: 0, dts, @@ -243,6 +244,11 @@ impl RTPAv1Pay { let mut pending_bytes = 0; let mut required_ids = None::<(u8, u8)>; + // Detect if this packet starts a keyframe and contains a sequence header, and if so + // set the N flag to indicate that this is the start of a new codec video sequence. + let mut contains_keyframe = false; + let mut contains_sequence_header = false; + // figure out how many OBUs we can fit into this packet for (idx, obu) in state.obus.iter().enumerate() { // for OBUs with extension headers, spatial and temporal IDs must be equal @@ -273,6 +279,8 @@ impl RTPAv1Pay { ); } + packet.start_of_coded_video_sequence = + contains_keyframe && contains_sequence_header; packet.ends_temporal_unit = true; if packet.obu_count > 3 { packet.payload_size += pending_bytes; @@ -282,6 +290,7 @@ impl RTPAv1Pay { return Some(packet); } + contains_keyframe |= obu.keyframe; continue; } else if packet.payload_size >= payload_limit || (packet.obu_count > 0 && current.obu_type == ObuType::SequenceHeader) @@ -291,6 +300,8 @@ impl RTPAv1Pay { packet.payload_size += pending_bytes; packet.omit_last_size_field = false; } + packet.start_of_coded_video_sequence = + contains_keyframe && contains_sequence_header; packet.ends_temporal_unit = marker && idx == state.obus.len() - 1; return Some(packet); } @@ -299,6 +310,8 @@ impl RTPAv1Pay { if packet.payload_size + pending_bytes + current.full_size() <= payload_limit { packet.obu_count += 1; packet.payload_size += current.partial_size() + pending_bytes; + contains_keyframe |= obu.keyframe; + contains_sequence_header |= obu.info.obu_type == ObuType::SequenceHeader; pending_bytes = current.leb_size; } // would it fit without the size field? @@ -307,6 +320,10 @@ impl RTPAv1Pay { { packet.obu_count += 1; packet.payload_size += current.partial_size() + pending_bytes; + contains_keyframe |= obu.keyframe; + contains_sequence_header |= obu.info.obu_type == ObuType::SequenceHeader; + packet.start_of_coded_video_sequence = + contains_keyframe && contains_sequence_header; packet.ends_temporal_unit = marker && idx == state.obus.len() - 1; return Some(packet); @@ -330,11 +347,16 @@ impl RTPAv1Pay { Some(payload_limit - packet.payload_size - pending_bytes - leb_size); packet.payload_size = payload_limit; packet.omit_last_size_field = leb_size == 0; + contains_keyframe |= obu.keyframe; + contains_sequence_header |= obu.info.obu_type == ObuType::SequenceHeader; } else if packet.obu_count > 3 { packet.ends_temporal_unit = marker && idx == state.obus.len() - 1; packet.payload_size += pending_bytes; } + packet.start_of_coded_video_sequence = + contains_keyframe && contains_sequence_header; + return Some(packet); } } @@ -344,6 +366,7 @@ impl RTPAv1Pay { packet.payload_size += pending_bytes; packet.omit_last_size_field = false; } + packet.start_of_coded_video_sequence = contains_keyframe && contains_sequence_header; packet.ends_temporal_unit = true; Some(packet) @@ -421,17 +444,15 @@ impl RTPAv1Pay { }; let aggr_header: [u8; 1] = [ - (state.open_obu_fragment as u8) << 7 | // Z + (state.open_obu_fragment as u8) << 7 | // Z ((packet.last_obu_fragment_size.is_some()) as u8) << 6 | // Y - (w as u8) << 4 | // W - (state.first_packet_in_seq as u8) << 3 // N + (w as u8) << 4 | // W + (packet.start_of_coded_video_sequence as u8) << 3 // N ; 1]; writer .write(&aggr_header) .map_err(err_flow!(self, aggr_header_write))?; - - state.first_packet_in_seq = false; } // append OBUs to the buffer @@ -675,9 +696,10 @@ impl RTPBasePayloadImpl for RTPAv1Pay { gst::FlowError::Error })?; + let keyframe = !buffer.flags().contains(gst::BufferFlags::DELTA_UNIT); // Does the buffer finished a full TU? let marker = buffer.flags().contains(gst::BufferFlags::MARKER) || state.framed; - let list = self.handle_new_obus(&mut state, map.as_slice(), marker, dts, pts)?; + let list = self.handle_new_obus(&mut state, map.as_slice(), keyframe, marker, dts, pts)?; drop(map); drop(state); @@ -806,6 +828,7 @@ mod tests { size: 0, ..base_obu }, + keyframe: true, ..ObuData::default() }, ObuData { @@ -888,6 +911,7 @@ mod tests { Some(PacketOBUData { obu_count: 3, payload_size: 18, + start_of_coded_video_sequence: false, last_obu_fragment_size: None, omit_last_size_field: true, ends_temporal_unit: true, @@ -906,6 +930,7 @@ mod tests { Some(PacketOBUData { obu_count: 5, payload_size: 36, + start_of_coded_video_sequence: true, last_obu_fragment_size: None, omit_last_size_field: false, ends_temporal_unit: true, @@ -955,10 +980,6 @@ mod tests { results[idx].1.obus.iter().cloned().collect::>() ); assert_eq!(state.open_obu_fragment, results[idx].1.open_obu_fragment); - assert_eq!( - state.first_packet_in_seq, - results[idx].1.first_packet_in_seq - ); } } }