rtpvp9depay: Improve SVC parsing, aggregate all layers

- Fix start and end of picture to support multiple layers. Start of
  picture is the first packet of the base layer, while end of picture
  is when the marker bit is set (last packet of the enhancement
  layers).
- All "layers" (aka "frames") of a picture are pushed downstream in a
  single buffer when picture is complete.
- Forgive SID=0 for enhancement layers (invalid, but Chrome and
  Firefox sends it)

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-good/-/merge_requests/773>
This commit is contained in:
Stian Selnes 2018-08-13 15:35:11 +02:00 committed by Mathieu Duponchelle
parent d77fcf251b
commit 95579a00c0
3 changed files with 132 additions and 6 deletions

View file

@ -68,6 +68,7 @@ gst_rtp_vp9_depay_init (GstRtpVP9Depay * self)
{
self->adapter = gst_adapter_new ();
self->started = FALSE;
self->inter_picture = FALSE;
}
static void
@ -186,7 +187,8 @@ gst_rtp_vp9_depay_process (GstRTPBaseDepayload * depay, GstRTPBuffer * rtp)
guint size;
gint spatial_layer = 0;
guint picture_id = PICTURE_ID_NONE;
gboolean i_bit, p_bit, l_bit, f_bit, b_bit, e_bit, v_bit;
gboolean i_bit, p_bit, l_bit, f_bit, b_bit, e_bit, v_bit, d_bit = 0;
gboolean is_start_of_picture;
if (G_UNLIKELY (GST_BUFFER_IS_DISCONT (rtp->buffer))) {
GST_LOG_OBJECT (self, "Discontinuity, flushing adapter");
@ -229,6 +231,19 @@ gst_rtp_vp9_depay_process (GstRTPBaseDepayload * depay, GstRTPBuffer * rtp)
/* Check L optional header layer indices */
if (l_bit) {
spatial_layer = (data[hdrsize] >> 1) & 0x07;
d_bit = (data[hdrsize] >> 0) & 0x01;
GST_TRACE_OBJECT (self, "TID=%d, U=%d, SID=%d, D=%d",
(data[hdrsize] >> 5) & 0x07, (data[hdrsize] >> 4) & 0x01,
(data[hdrsize] >> 1) & 0x07, (data[hdrsize] >> 0) & 0x01);
if (spatial_layer == 0 && d_bit != 0) {
/* Invalid according to draft-ietf-payload-vp9-06, but firefox 61 and
* chrome 66 sends enchanment layers with SID=0, so let's not drop the
* packet. */
GST_LOG_OBJECT (self, "Invalid inter-layer dependency for base layer");
}
hdrsize++;
/* Check TL0PICIDX temporal layer zero index (non-flexible mode) */
if (!f_bit)
@ -313,8 +328,9 @@ gst_rtp_vp9_depay_process (GstRTPBaseDepayload * depay, GstRTPBuffer * rtp)
if (G_UNLIKELY (hdrsize >= size))
goto too_small;
is_start_of_picture = b_bit && (!l_bit || !d_bit);
/* If this is a start frame AND we are already processing a frame, we need to flush and wait for next start frame */
if (b_bit) {
if (is_start_of_picture) {
if (G_UNLIKELY (self->started)) {
GST_DEBUG_OBJECT (depay, "Incomplete frame, flushing adapter");
gst_adapter_clear (self->adapter);
@ -340,6 +356,7 @@ gst_rtp_vp9_depay_process (GstRTPBaseDepayload * depay, GstRTPBuffer * rtp)
self->stop_lost_events = FALSE;
}
self->started = TRUE;
self->inter_picture = FALSE;
}
payload = gst_rtp_buffer_get_payload_subbuffer (rtp, hdrsize, -1);
@ -351,11 +368,14 @@ gst_rtp_vp9_depay_process (GstRTPBaseDepayload * depay, GstRTPBuffer * rtp)
}
gst_adapter_push (self->adapter, payload);
self->last_picture_id = picture_id;
self->inter_picture |= p_bit;
/* Marker indicates that it was the last rtp packet for this frame */
/* Marker indicates that it was the last rtp packet for this picture. Note
* that if spatial scalability is used, e_bit will be set for the last
* packet of a frame while the marker bit is not set until the last packet
* of the picture. */
if (gst_rtp_buffer_get_marker (rtp)) {
GstBuffer *out;
gboolean key_frame_first_layer = !p_bit && spatial_layer == 0;
GST_DEBUG_OBJECT (depay,
"Found the end of the frame (%" G_GSIZE_FORMAT " bytes)",
@ -373,7 +393,7 @@ gst_rtp_vp9_depay_process (GstRTPBaseDepayload * depay, GstRTPBuffer * rtp)
out = gst_buffer_make_writable (out);
/* Filter away all metas that are not sensible to copy */
gst_rtp_drop_non_video_meta (self, out);
if (!key_frame_first_layer) {
if (self->inter_picture) {
GST_BUFFER_FLAG_SET (out, GST_BUFFER_FLAG_DELTA_UNIT);
if (!self->caps_sent) {
@ -426,7 +446,6 @@ too_small:
GST_LOG_OBJECT (self, "Invalid rtp packet (too small), ignoring");
gst_adapter_clear (self->adapter);
self->started = FALSE;
goto done;
}

View file

@ -69,6 +69,7 @@ struct _GstRtpVP9Depay
* packets.
*/
gboolean stop_lost_events;
gboolean inter_picture;
};
GType gst_rtp_vp9_depay_get_type (void);

View file

@ -343,6 +343,110 @@ GST_START_TEST (test_depay_resend_gap_event)
GST_END_TEST;
GST_START_TEST (test_depay_svc_merge_layers)
{
/* This simulates a simple SVC stream, for simplicity we handcraft a couple
* of rtp packets. */
/* First packet contains a complete base layer I-frame (s-bit and e-bit).
* Note the marker bit is not set to indicate that there will be more
* packets for this picture. */
guint8 layer0[] = {
0x80, 0x74, 0x00, 0x00, 0x49, 0x88, 0xd9, 0xf8, 0xa0, 0x6c, 0x65, 0x6c,
0xac, 0x80, 0x01, 0x00, 0x01, 0x02, 0x49, 0x3f, 0x1c, 0x12, 0x0e, 0x0c,
0xd0, 0x1b, 0xb9, 0x80, 0x80, 0xb0, 0x18, 0x0f, 0xa6, 0x4d, 0x01, 0xa5
};
/* s-bit, e-bit, d-bit and sid=1 set to indicate a complete enhancement
* frame. marker bit set to indicate last packet of picture. */
guint8 layer1_with_marker[] = {
0x80, 0xf4, 0x00, 0x01, 0x49, 0x88, 0xd9, 0xf8, 0xa0, 0x6c, 0x65, 0x6c,
0xac, 0x80, 0x01, 0x03, 0x01, 0x02, 0x49, 0x3f, 0x1c, 0x12, 0x0e, 0x0c,
0xd0, 0x1b, 0xb9, 0x80, 0x80, 0xb0, 0x18, 0x0f, 0xa6, 0x4d, 0x01, 0xa5
};
GstBuffer *buf;
GstHarness *h = gst_harness_new ("rtpvp9depay");
gst_harness_set_src_caps_str (h, RTP_VP9_CAPS_STR);
/* The first packet contains a complete base layer frame that. Since the
* marker bit is not set, it will wait for an enhancement layer before it
* pushes it downstream. */
gst_harness_push (h, gst_buffer_new_wrapped_full (GST_MEMORY_FLAG_READONLY,
layer0, sizeof (layer0), 0, sizeof (layer0), NULL, NULL));
fail_unless_equals_int (0, gst_harness_buffers_received (h));
/* Next packet contains a complete enhancement frame. The picture is
* complete (marker bit set) and can be pushed */
gst_harness_push (h, gst_buffer_new_wrapped_full (GST_MEMORY_FLAG_READONLY,
layer1_with_marker, sizeof (layer1_with_marker), 0,
sizeof (layer1_with_marker), NULL, NULL));
fail_unless_equals_int (1, gst_harness_buffers_received (h));
/* The buffer should contain both layer 0 and layer 1. */
buf = gst_harness_pull (h);
fail_unless_equals_int (19 * 2, gst_buffer_get_size (buf));
gst_buffer_unref (buf);
gst_harness_teardown (h);
}
GST_END_TEST;
GST_START_TEST (test_depay_svc_forgive_invalid_sid)
{
/* This simulates an invalid stream received from FF61 and Chromium 66
* (Electron). The RTP header signals the same spatial layer ID for all
* packets of a picture (SID=0), but the s-bit, e-bit and d-bit suggests
* there is a second layer. The conservative approach would be to drop the
* enhancement layers since we don't want to push a bitstream we're
* uncertain of to the decoder. However, this reduces the quality
* significantly and also sometimes results in an encoder/decoder mismatch
* (altough it shouldn't). */
/* The first packet contains a complete base layer frame. Since the
* marker bit is not set, it will wait for an enhancement layer before it
* pushes it downstream. s-bit, e-bit set, no marker*/
guint8 layer0[] = {
0x80, 0x74, 0x00, 0x00, 0x49, 0x88, 0xd9, 0xf8, 0xa0, 0x6c, 0x65, 0x6c,
0xac, 0x80, 0x01, 0x00, 0x01, 0x02, 0x49, 0x3f, 0x1c, 0x12, 0x0e, 0x0c,
0xd0, 0x1b, 0xb9, 0x80, 0x80, 0xb0, 0x18, 0x0f, 0xa6, 0x4d, 0x01, 0xa5
};
/* Next packet contains a complete enhancement frame. The picture is
* complete (marker bit set) and picture can be pushed. However, the SID is
* invalid (SID=0, but should be SID=1). Let's forgive that and push the
* packet downstream anyway. s-bit, e-bit, d-bit and sid=0 and marker
* bit. */
guint8 layer1_with_sid0_and_marker[] = {
0x80, 0xf4, 0x00, 0x01, 0x49, 0x88, 0xd9, 0xf8, 0xa0, 0x6c, 0x65, 0x6c,
0xac, 0x80, 0x01, 0x01, 0x01, 0x02, 0x49, 0x3f, 0x1c, 0x12, 0x0e, 0x0c,
0xd0, 0x1b, 0xb9, 0x80, 0x80, 0xb0, 0x18, 0x0f, 0xa6, 0x4d, 0x01, 0xa5
};
GstBuffer *buf;
GstHarness *h = gst_harness_new ("rtpvp9depay");
gst_harness_set_src_caps_str (h, RTP_VP9_CAPS_STR);
gst_harness_push (h, gst_buffer_new_wrapped_full (GST_MEMORY_FLAG_READONLY,
layer0, sizeof (layer0), 0, sizeof (layer0), NULL, NULL));
fail_unless_equals_int (0, gst_harness_buffers_received (h));
gst_harness_push (h, gst_buffer_new_wrapped_full (GST_MEMORY_FLAG_READONLY,
layer1_with_sid0_and_marker, sizeof (layer1_with_sid0_and_marker), 0,
sizeof (layer1_with_sid0_and_marker), NULL, NULL));
fail_unless_equals_int (1, gst_harness_buffers_received (h));
/* The buffer should contain both layer 0 and layer 1. */
buf = gst_harness_pull (h);
fail_unless_equals_int (19 * 2, gst_buffer_get_size (buf));
gst_buffer_unref (buf);
gst_harness_teardown (h);
}
GST_END_TEST;
static Suite *
rtpvp9_suite (void)
{
@ -355,6 +459,8 @@ rtpvp9_suite (void)
G_N_ELEMENTS (stop_gap_events_test_data));
tcase_add_loop_test (tc_chain, test_depay_resend_gap_event, 0,
G_N_ELEMENTS (resend_gap_event_test_data));
tcase_add_test (tc_chain, test_depay_svc_merge_layers);
tcase_add_test (tc_chain, test_depay_svc_forgive_invalid_sid);
return s;
}