smartencoder: clean up and extend accepted formats

* Add support for H265

* Don't overwrite original codec_data / streamheader in the output
  caps, but instead allow them to change and send them to the
  combiner at the right moment: encoder caps, reencoded GOP,
  original caps, original GOP(s), and potentially encoder caps
  and rencoded last GOP.

* For H264 / H265, force usage of a format with inband SPS / PPS
  (avc3 / hev1), this is cleaner than misadvertising avc1, hvc1 and
  some muxers like mp4mux will actually advertise both differently.

  Unfortunately, while mp4 supports updating the codec_data and using
  avc1 with no in-band SPS / PPS updates, it turns out some decoders
  (eg chrome / firefox) don't handle this particularly well and stop
  decoding after the reencoded GOP. We could expose a switch to
  force usage of avc1 / hvc1 nevertheless, but for now stick to
  requiring that the parser output SPS / PPS in-band with
  config-interval=-1 (that has not changed)

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/-/merge_requests/1249>
This commit is contained in:
Mathieu Duponchelle 2021-08-10 02:09:09 +02:00 committed by GStreamer Marge Bot
parent 1ae8b61ec0
commit 4aa72cea4f
3 changed files with 151 additions and 70 deletions

View file

@ -1178,7 +1178,6 @@ _capsfilter_force_format (GstPad * pad,
GParamSpec * arg G_GNUC_UNUSED, StreamGroup * sgroup) GParamSpec * arg G_GNUC_UNUSED, StreamGroup * sgroup)
{ {
GstCaps *caps; GstCaps *caps;
GstStructure *structure;
GstElement *parent = GstElement *parent =
GST_ELEMENT_CAST (gst_object_get_parent (GST_OBJECT (pad))); GST_ELEMENT_CAST (gst_object_get_parent (GST_OBJECT (pad)));
@ -1190,15 +1189,37 @@ _capsfilter_force_format (GstPad * pad,
g_object_get (pad, "caps", &caps, NULL); g_object_get (pad, "caps", &caps, NULL);
caps = gst_caps_copy (caps); caps = gst_caps_copy (caps);
structure = gst_caps_get_structure (caps, 0);
gst_structure_remove_field (structure, "streamheader");
GST_INFO_OBJECT (pad, "Forcing caps to %" GST_PTR_FORMAT, caps); GST_INFO_OBJECT (pad, "Forcing caps to %" GST_PTR_FORMAT, caps);
if (parent == sgroup->outfilter || parent == sgroup->smart_capsfilter) { if (parent == sgroup->outfilter || parent == sgroup->smart_capsfilter) {
/* outfilter and the smart encoder internal capsfilter need to always be /* outfilter and the smart encoder internal capsfilter need to always be
* in sync so the caps match between the two */ * in sync so the caps match between the two */
if (sgroup->smart_capsfilter) { if (sgroup->smart_capsfilter) {
gst_structure_remove_field (structure, "codec_data"); GstStructure *structure = gst_caps_get_structure (caps, 0);
/* The smart encoder handles codec_data itself */
/* Pick a stream format that allows for in-band SPS updates, and remove
* restrictions on fields that can be updated by codec_data or in-band SPS
*/
if (gst_structure_has_name (structure, "video/x-h264")) {
gst_structure_set (structure, "stream-format",
G_TYPE_STRING, "avc3", NULL);
gst_structure_remove_fields (structure, "codec_data", "profile",
"level", NULL);
} else if (gst_structure_has_name (structure, "video/x-h265")) {
gst_structure_set (structure, "stream-format",
G_TYPE_STRING, "hev1", NULL);
gst_structure_remove_fields (structure, "codec_data", "tier", "profile",
"level", NULL);
}
/* For VP8 / VP9, streamheader in the caps is informative, and
* not actually used by muxers, we can allow it to change */
if (gst_structure_has_name (structure, "video/x-vp8") ||
gst_structure_has_name (structure, "video/x-vp9")) {
gst_structure_remove_field (structure, "streamheader");
}
g_object_set (sgroup->smart_capsfilter, "caps", caps, NULL); g_object_set (sgroup->smart_capsfilter, "caps", caps, NULL);
g_signal_handler_disconnect (sgroup->smart_capsfilter->sinkpads->data, g_signal_handler_disconnect (sgroup->smart_capsfilter->sinkpads->data,
@ -1293,10 +1314,12 @@ setup_smart_encoder (GstEncodeBaseBin * ebin, GstEncodingProfile * sprof,
GstElement *sinkelement, *convert = NULL; GstElement *sinkelement, *convert = NULL;
GstElement *smartencoder = g_object_new (GST_TYPE_SMART_ENCODER, NULL); GstElement *smartencoder = g_object_new (GST_TYPE_SMART_ENCODER, NULL);
GstPad *srcpad = gst_element_get_static_pad (smartencoder, "src"); GstPad *srcpad = gst_element_get_static_pad (smartencoder, "src");
GstCaps *format = gst_encoding_profile_get_format (sprof); GstCaps *format =
gst_caps_make_writable (gst_encoding_profile_get_format (sprof));
GstCaps *tmpcaps = gst_pad_query_caps (srcpad, NULL); GstCaps *tmpcaps = gst_pad_query_caps (srcpad, NULL);
const gboolean native_video = const gboolean native_video =
! !(ebin->flags & GST_ENCODEBIN_FLAG_NO_VIDEO_CONVERSION); ! !(ebin->flags & GST_ENCODEBIN_FLAG_NO_VIDEO_CONVERSION);
GstStructure *structure = gst_caps_get_structure (format, 0);
/* Check if stream format is compatible */ /* Check if stream format is compatible */
if (!gst_caps_can_intersect (tmpcaps, format)) { if (!gst_caps_can_intersect (tmpcaps, format)) {
@ -1315,6 +1338,29 @@ setup_smart_encoder (GstEncodeBaseBin * ebin, GstEncodingProfile * sprof,
parser = _get_parser (ebin, sprof, encoder); parser = _get_parser (ebin, sprof, encoder);
sgroup->smart_capsfilter = gst_element_factory_make ("capsfilter", NULL); sgroup->smart_capsfilter = gst_element_factory_make ("capsfilter", NULL);
reencoder_bin = gst_bin_new (NULL); reencoder_bin = gst_bin_new (NULL);
/* Pick a stream format that allows for in-band SPS updates, and remove
* restrictions on fields that can be updated by codec_data or in-band SPS
*/
if (gst_structure_has_name (structure, "video/x-h264")) {
gst_structure_set (structure, "stream-format", G_TYPE_STRING, "avc3", NULL);
gst_structure_remove_fields (structure, "codec_data", "profile",
"level", NULL);
} else if (gst_structure_has_name (structure, "video/x-h265")) {
gst_structure_set (structure, "stream-format", G_TYPE_STRING, "hev1", NULL);
gst_structure_remove_fields (structure, "codec_data", "tier", "profile",
"level", NULL);
}
/* For VP8 / VP9, streamheader in the caps is informative, and
* not actually used by muxers, we can allow it to change */
if (gst_structure_has_name (structure, "video/x-vp8") ||
gst_structure_has_name (structure, "video/x-vp9")) {
gst_structure_remove_field (structure, "streamheader");
}
g_object_set (sgroup->smart_capsfilter, "caps", format, NULL); g_object_set (sgroup->smart_capsfilter, "caps", format, NULL);
gst_bin_add_many (GST_BIN (reencoder_bin), gst_bin_add_many (GST_BIN (reencoder_bin),
@ -1564,8 +1610,8 @@ _create_stream_group (GstEncodeBaseBin * ebin, GstEncodingProfile * sprof,
/* Expose input queue or identity sink pad as ghostpad */ /* Expose input queue or identity sink pad as ghostpad */
sinkpad = sinkpad =
gst_element_get_static_pad (sgroup->identity ? sgroup-> gst_element_get_static_pad (sgroup->identity ? sgroup->identity : sgroup->
identity : sgroup->inqueue, "sink"); inqueue, "sink");
if (sinkpadname == NULL) { if (sinkpadname == NULL) {
gchar *pname = gchar *pname =
g_strdup_printf ("%s_%u", gst_encoding_profile_get_type_nick (sprof), g_strdup_printf ("%s_%u", gst_encoding_profile_get_type_nick (sprof),

View file

@ -34,6 +34,7 @@ GST_DEBUG_CATEGORY_STATIC (smart_encoder_debug);
"video/x-vp8;"\ "video/x-vp8;"\
"video/x-vp9;"\ "video/x-vp9;"\
"video/x-h264;"\ "video/x-h264;"\
"video/x-h265;"\
"video/mpeg,mpegversion=(int)1,systemstream=(boolean)false;"\ "video/mpeg,mpegversion=(int)1,systemstream=(boolean)false;"\
"video/mpeg,mpegversion=(int)2,systemstream=(boolean)false;" "video/mpeg,mpegversion=(int)2,systemstream=(boolean)false;"
@ -120,50 +121,25 @@ internal_event_func (GstPad * pad, GstObject * parent, GstEvent * event)
break; break;
case GST_EVENT_SEGMENT: case GST_EVENT_SEGMENT:
gst_event_copy_segment (event, &self->internal_segment); gst_event_copy_segment (event, &self->internal_segment);
break;
case GST_EVENT_CAPS:
{
GstCaps *caps;
gst_event_parse_caps (event, &caps); if (self->output_segment.format == GST_FORMAT_UNDEFINED) {
caps = gst_caps_copy (caps); gst_segment_init (&self->output_segment, GST_FORMAT_TIME);
if (self->last_caps) {
GstBuffer *codec_data = NULL, *stream_header;
GstCaps *new_caps;
GstStructure *last_struct = gst_caps_get_structure (self->last_caps, 0);
if (gst_structure_get (last_struct, "codec_data", GST_TYPE_BUFFER, /* Ensure that we can represent negative DTS in our 'single' segment */
&codec_data, NULL) && codec_data) { self->output_segment.start = 60 * 60 * GST_SECOND * 1000;
gst_structure_set (gst_caps_get_structure (caps, 0), "codec_data", if (!gst_pad_push_event (self->srcpad,
GST_TYPE_BUFFER, codec_data, NULL); gst_event_new_segment (&self->output_segment))) {
} GST_ERROR_OBJECT (self, "Could not push segment!");
if (gst_structure_get (last_struct, "stream_header", GST_TYPE_BUFFER, GST_ELEMENT_FLOW_ERROR (self, GST_FLOW_ERROR);
&stream_header, NULL) && stream_header) {
gst_structure_set (gst_caps_get_structure (caps, 0), "stream_header",
GST_TYPE_BUFFER, stream_header, NULL);
}
new_caps = gst_caps_intersect (self->last_caps, caps);
if (!new_caps || gst_caps_is_empty (new_caps)) {
GST_ERROR_OBJECT (parent, "New caps from reencoder %" GST_PTR_FORMAT
" are not compatible with previous caps: %" GST_PTR_FORMAT, caps,
self->last_caps);
g_mutex_lock (&self->internal_flow_lock);
self->internal_flow = GST_FLOW_NOT_NEGOTIATED;
g_cond_signal (&self->internal_flow_cond);
g_mutex_unlock (&self->internal_flow_lock);
return FALSE; return FALSE;
} }
gst_caps_unref (caps);
caps = new_caps;
} }
event = gst_event_new_caps (caps);
self->last_caps = caps;
break;
case GST_EVENT_CAPS:
{
return gst_pad_push_event (self->srcpad, event); return gst_pad_push_event (self->srcpad, event);
} }
default: default:
@ -389,21 +365,6 @@ gst_smart_encoder_push_pending_gop (GstSmartEncoder * self)
GST_DEBUG ("Pushing pending GOP (%" GST_TIME_FORMAT " -- %" GST_TIME_FORMAT GST_DEBUG ("Pushing pending GOP (%" GST_TIME_FORMAT " -- %" GST_TIME_FORMAT
")", GST_TIME_ARGS (self->gop_start), GST_TIME_ARGS (self->gop_stop)); ")", GST_TIME_ARGS (self->gop_start), GST_TIME_ARGS (self->gop_stop));
if (self->output_segment.format == GST_FORMAT_UNDEFINED) {
gst_segment_init (&self->output_segment, GST_FORMAT_TIME);
/* Ensure that we can represent negative DTS in our 'single' segment */
self->output_segment.start = 60 * 60 * GST_SECOND * 1000;
if (!gst_pad_push_event (self->srcpad,
gst_event_new_segment (&self->output_segment))) {
GST_ERROR_OBJECT (self, "Could not push segment!");
GST_ELEMENT_FLOW_ERROR (self, GST_FLOW_ERROR);
return GST_FLOW_ERROR;
}
}
if (!self->pending_gop) { if (!self->pending_gop) {
/* This might happen on EOS */ /* This might happen on EOS */
GST_INFO_OBJECT (self, "Empty gop!"); GST_INFO_OBJECT (self, "Empty gop!");
@ -431,7 +392,31 @@ gst_smart_encoder_push_pending_gop (GstSmartEncoder * self)
GST_TIME_FORMAT " - %" GST_SEGMENT_FORMAT, GST_TIME_ARGS (cstart), GST_TIME_FORMAT " - %" GST_SEGMENT_FORMAT, GST_TIME_ARGS (cstart),
GST_TIME_ARGS (cstop), &self->input_segment); GST_TIME_ARGS (cstop), &self->input_segment);
res = gst_smart_encoder_reencode_gop (self); res = gst_smart_encoder_reencode_gop (self);
/* Make sure we push the original caps when resuming the original stream */
self->push_original_caps = TRUE;
} else { } else {
if (self->push_original_caps) {
gst_pad_push_event (self->srcpad,
gst_event_new_caps (self->original_caps));
self->push_original_caps = FALSE;
}
if (self->output_segment.format == GST_FORMAT_UNDEFINED) {
gst_segment_init (&self->output_segment, GST_FORMAT_TIME);
/* Ensure that we can represent negative DTS in our 'single' segment */
self->output_segment.start = 60 * 60 * GST_SECOND * 1000;
if (!gst_pad_push_event (self->srcpad,
gst_event_new_segment (&self->output_segment))) {
GST_ERROR_OBJECT (self, "Could not push segment!");
GST_ELEMENT_FLOW_ERROR (self, GST_FLOW_ERROR);
return GST_FLOW_ERROR;
}
}
/* The whole GOP is within the segment, push all pending buffers downstream */ /* The whole GOP is within the segment, push all pending buffers downstream */
GST_INFO_OBJECT (self, GST_INFO_OBJECT (self,
"GOP doesn't need to be modified, pushing downstream: %" GST_TIME_FORMAT "GOP doesn't need to be modified, pushing downstream: %" GST_TIME_FORMAT
@ -523,13 +508,18 @@ smart_encoder_sink_event (GstPad * pad, GstObject * ghostpad, GstEvent * event)
smart_encoder_reset (self); smart_encoder_reset (self);
break; break;
case GST_EVENT_CAPS: case GST_EVENT_CAPS:
if (self->last_caps) { {
GstCaps *caps;
gst_event_parse_caps (event, &caps);
if (self->original_caps)
gst_caps_unref (self->original_caps);
self->original_caps = gst_caps_ref (caps);
self->push_original_caps = TRUE;
gst_clear_event (&event); gst_clear_event (&event);
} else {
gst_event_parse_caps (event, &self->last_caps);
self->last_caps = gst_caps_copy (self->last_caps);
}
break; break;
}
case GST_EVENT_STREAM_START: case GST_EVENT_STREAM_START:
gst_event_replace (&self->stream_start_event, gst_event_ref (event)); gst_event_replace (&self->stream_start_event, gst_event_ref (event));
break; break;
@ -620,14 +610,30 @@ _pad_sink_acceptcaps (GstPad * pad, GstSmartEncoder * self, GstCaps * caps)
n = gst_caps_get_size (accepted_caps); n = gst_caps_get_size (accepted_caps);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
s = gst_caps_get_structure (accepted_caps, i); s = gst_caps_get_structure (accepted_caps, i);
gst_structure_remove_fields (s, "codec_data", NULL);
if (gst_structure_has_name (s, "video/x-h264") ||
gst_structure_has_name (s, "video/x-h265")) {
gst_structure_remove_fields (s, "codec_data", "tier", "profile", "level",
NULL);
} else if (gst_structure_has_name (s, "video/x-vp8")
|| gst_structure_has_name (s, "video/x-vp9")) {
gst_structure_remove_field (s, "streamheader");
}
} }
modified_caps = gst_caps_copy (caps); modified_caps = gst_caps_copy (caps);
n = gst_caps_get_size (modified_caps); n = gst_caps_get_size (modified_caps);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
s = gst_caps_get_structure (modified_caps, i); s = gst_caps_get_structure (modified_caps, i);
gst_structure_remove_fields (s, "codec_data", NULL);
if (gst_structure_has_name (s, "video/x-h264") ||
gst_structure_has_name (s, "video/x-h265")) {
gst_structure_remove_fields (s, "codec_data", "tier", "profile", "level",
NULL);
} else if (gst_structure_has_name (s, "video/x-vp8")
|| gst_structure_has_name (s, "video/x-vp9")) {
gst_structure_remove_field (s, "streamheader");
}
} }
ret = gst_caps_can_intersect (modified_caps, accepted_caps); ret = gst_caps_can_intersect (modified_caps, accepted_caps);
@ -689,9 +695,32 @@ gst_smart_encoder_add_parser (GstSmartEncoder * self, GstCaps * format)
goto failed; goto failed;
} }
/* Add SPS/PPS before each gop to ensure that they can be decoded
* independently */
g_object_set (parser, "config-interval", -1, NULL); g_object_set (parser, "config-interval", -1, NULL);
if (!gst_bin_add (GST_BIN (self), parser)) {
GST_ERROR_OBJECT (self, "Could not add parser.");
goto failed;
}
if (!gst_element_link (parser, capsfilter)) {
GST_ERROR_OBJECT (self, "Could not link capfilter and parser.");
goto failed;
}
sinkpad = gst_element_get_static_pad (parser, "sink");
} else if (gst_structure_has_name (gst_caps_get_structure (format, 0),
"video/x-h265")) {
GstElement *parser = gst_element_factory_make ("h265parse", NULL);
if (!parser) {
GST_ERROR_OBJECT (self, "`h265parse` is missing, can't encode smartly");
goto failed;
}
g_object_set (parser, "config-interval", -1, NULL);
if (!gst_bin_add (GST_BIN (self), parser)) { if (!gst_bin_add (GST_BIN (self), parser)) {
GST_ERROR_OBJECT (self, "Could not add parser."); GST_ERROR_OBJECT (self, "Could not add parser.");
@ -793,6 +822,11 @@ gst_smart_encoder_dispose (GObject * object)
gst_clear_object (&self->encoder); gst_clear_object (&self->encoder);
if (self->original_caps) {
gst_caps_unref (self->original_caps);
self->original_caps = NULL;
}
G_OBJECT_CLASS (gst_smart_encoder_parent_class)->dispose (object); G_OBJECT_CLASS (gst_smart_encoder_parent_class)->dispose (object);
} }

View file

@ -45,7 +45,8 @@ struct _GstSmartEncoder {
GstSegment internal_segment; GstSegment internal_segment;
GstClockTime last_dts; GstClockTime last_dts;
GstCaps *last_caps; GstCaps *original_caps;
gboolean push_original_caps;
GstEvent *segment_event; GstEvent *segment_event;
GstEvent *stream_start_event; GstEvent *stream_start_event;