vtenc: apply DTS offset to ensure DTS <= PTS

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4888>
This commit is contained in:
Andoni Morales Alastruey 2023-03-30 15:38:22 +02:00 committed by Tim-Philipp Müller
parent 49106eab5d
commit 01d999342f
13 changed files with 157 additions and 30 deletions

View file

@ -15,12 +15,14 @@ applemedia_args = [
'-Dgst_core_media_buffer_get_type=gst_core_media_buffer_priv_get_type',
'-Dgst_core_media_buffer_get_pixel_buffer=gst_core_media_buffer_priv_get_pixel_buffer',
'-Dgst_core_video_buffer_new=gst_core_video_buffer_priv_new',
'-Dgst_core_video_buffer_get_type=gst_core_video_buffer_priv_get_type'
'-Dgst_core_video_buffer_get_type=gst_core_video_buffer_priv_get_type',
'-DGST_USE_UNSTABLE_API'
]
applemedia_objc_args = []
applemedia_option = get_option('applemedia')
if not ['darwin', 'ios'].contains(host_system) or applemedia_option.disabled()
applemedia_found_deps = false
subdir_done()
endif

View file

@ -30,7 +30,6 @@
#if defined(APPLEMEDIA_MOLTENVK)
#include <gst/vulkan/vulkan.h>
#endif
#define GST_USE_UNSTABLE_API
#include <gst/codecparsers/gsth264parser.h>
G_BEGIN_DECLS

View file

@ -86,6 +86,8 @@ GST_DEBUG_CATEGORY (gst_vtenc_debug);
#define GST_VTENC_CODEC_DETAILS_QDATA \
g_quark_from_static_string ("vtenc-codec-details")
#define CMTIME_TO_GST_CLOCK_TIME(time) time.value / (time.timescale / GST_SECOND)
/* define EnableHardwareAcceleratedVideoEncoder in < 10.9 */
#if defined(MAC_OS_X_VERSION_MAX_ALLOWED) && MAC_OS_X_VERSION_MAX_ALLOWED < 1090
const CFStringRef
@ -240,7 +242,7 @@ gst_vtenc_base_init (GstVTEncClass * klass)
if (__builtin_available (macOS 13.0, *)) {
/* Can't negate a __builtin_available check */
} else {
/* Disable ARGB64/RGBA64 if we're on M1 Pro/Max and macOS < 13.0
/* Disable ARGB64/RGBA64 if we're on M1 Pro/Max and macOS < 13.0
* due to a bug within VideoToolbox which causes encoding to fail. */
retval = sysctlbyname ("machdep.cpu.brand_string", &cpu_name, &cpu_len,
NULL, 0);
@ -713,6 +715,9 @@ gst_vtenc_start (GstVideoEncoder * enc)
{
GstVTEnc *self = GST_VTENC_CAST (enc);
/* DTS can be negative if b-frames are enabled */
gst_video_encoder_set_min_pts (enc, GST_SECOND * 60 * 60 * 1000);
self->cur_outframes = g_async_queue_new ();
return TRUE;
@ -750,13 +755,12 @@ gst_vtenc_stop (GstVideoEncoder * enc)
return TRUE;
}
static CFStringRef
gst_vtenc_h264_profile_level_key (GstVTEnc * self, const gchar * profile,
static gboolean
gst_vtenc_h264_parse_profile_level_key (GstVTEnc * self, const gchar * profile,
const gchar * level_arg)
{
char level[64];
gchar *key = NULL;
CFStringRef ret = NULL;
if (profile == NULL)
profile = "main";
@ -767,13 +771,16 @@ gst_vtenc_h264_profile_level_key (GstVTEnc * self, const gchar * profile,
if (!strcmp (profile, "constrained-baseline") ||
!strcmp (profile, "baseline")) {
profile = "Baseline";
self->h264_profile = GST_H264_PROFILE_BASELINE;
} else if (g_str_has_prefix (profile, "high")) {
profile = "High";
self->h264_profile = GST_H264_PROFILE_HIGH;
} else if (!strcmp (profile, "main")) {
profile = "Main";
self->h264_profile = GST_H264_PROFILE_MAIN;
} else {
GST_ERROR_OBJECT (self, "invalid profile: %s", profile);
return ret;
return FALSE;
}
if (strlen (level) == 1) {
@ -784,22 +791,21 @@ gst_vtenc_h264_profile_level_key (GstVTEnc * self, const gchar * profile,
}
key = g_strdup_printf ("H264_%s_%s", profile, level);
ret = CFStringCreateWithBytes (NULL, (const guint8 *) key, strlen (key),
self->profile_level =
CFStringCreateWithBytes (NULL, (const guint8 *) key, strlen (key),
kCFStringEncodingASCII, 0);
GST_INFO_OBJECT (self, "negotiated profile and level %s", key);
g_free (key);
return ret;
return TRUE;
}
static CFStringRef
gst_vtenc_hevc_profile_level_key (GstVTEnc * self, const gchar * profile,
static gboolean
gst_vtenc_hevc_parse_profile_level_key (GstVTEnc * self, const gchar * profile,
const gchar * level_arg)
{
gchar *key = NULL;
CFStringRef ret = NULL;
if (profile == NULL || !strcmp (profile, "main"))
profile = "Main";
@ -811,18 +817,18 @@ gst_vtenc_hevc_profile_level_key (GstVTEnc * self, const gchar * profile,
profile = "Main42210";
else {
GST_ERROR_OBJECT (self, "invalid profile: %s", profile);
return ret;
return FALSE;
}
/* VT does not support specific levels for HEVC */
key = g_strdup_printf ("HEVC_%s_AutoLevel", profile);
ret = CFStringCreateWithBytes (NULL, (const guint8 *) key, strlen (key),
self->profile_level =
CFStringCreateWithBytes (NULL, (const guint8 *) key, strlen (key),
kCFStringEncodingASCII, 0);
GST_INFO_OBJECT (self, "negotiated profile and level %s", key);
g_free (key);
return ret;
return TRUE;
}
static gboolean
@ -834,20 +840,11 @@ gst_vtenc_negotiate_profile_and_level (GstVTEnc * self, GstStructure * s)
if (self->profile_level)
CFRelease (self->profile_level);
if (self->specific_format_id == kCMVideoCodecType_HEVC)
self->profile_level =
gst_vtenc_hevc_profile_level_key (self, profile, level);
else
self->profile_level =
gst_vtenc_h264_profile_level_key (self, profile, level);
if (self->profile_level == NULL) {
GST_ERROR_OBJECT (self, "unsupported profile '%s' or level '%s'",
profile, level);
return FALSE;
if (self->specific_format_id == kCMVideoCodecType_HEVC) {
return gst_vtenc_hevc_parse_profile_level_key (self, profile, level);
} else {
return gst_vtenc_h264_parse_profile_level_key (self, profile, level);
}
return TRUE;
}
static gboolean
@ -1225,6 +1222,43 @@ gst_vtenc_set_colorimetry (GstVTEnc * self, VTCompressionSessionRef session)
}
}
static gboolean
gst_vtenc_compute_dts_offset (GstVTEnc * self, gint fps_n, gint fps_d)
{
gint num_offset_frames;
// kVTCompressionPropertyKey_AllowFrameReordering enables B-Frames
if (!self->allow_frame_reordering ||
(self->specific_format_id == kCMVideoCodecType_H264
&& self->h264_profile == GST_H264_PROFILE_BASELINE)) {
num_offset_frames = 0;
} else {
if (self->specific_format_id == kCMVideoCodecType_H264) {
// H264 encoder always sets 2 max_num_ref_frames
num_offset_frames = 1;
} else {
// HEVC encoder uses B-pyramid
num_offset_frames = 2;
}
}
if (fps_d == 0 && num_offset_frames != 0) {
GST_ERROR_OBJECT (self,
"Variable framerate is not supported with B-Frames");
return FALSE;
}
self->dts_offset =
gst_util_uint64_scale (num_offset_frames * GST_SECOND,
self->video_info.fps_d, self->video_info.fps_n);
GST_DEBUG_OBJECT (self, "DTS Offset:%" GST_TIME_FORMAT,
GST_TIME_ARGS (self->dts_offset));
return TRUE;
}
static VTCompressionSessionRef
gst_vtenc_create_session (GstVTEnc * self)
{
@ -1270,6 +1304,13 @@ gst_vtenc_create_session (GstVTEnc * self)
/* This was set in gst_vtenc_negotiate_specific_format_details() */
g_assert_cmpint (self->specific_format_id, !=, 0);
if (self->profile_level) {
if (!gst_vtenc_compute_dts_offset (self, self->video_info.fps_d,
self->video_info.fps_n)) {
goto beach;
}
}
status = VTCompressionSessionCreate (NULL,
self->negotiated_width, self->negotiated_height,
self->specific_format_id, encoder_spec, pb_attrs, NULL,
@ -1586,6 +1627,18 @@ gst_vtenc_update_latency (GstVTEnc * self)
CFRelease (value);
}
static void
gst_vtenc_update_timestamps (GstVTEnc * self, GstVideoCodecFrame * frame,
CMSampleBufferRef sample_buf)
{
CMTime pts = CMSampleBufferGetOutputPresentationTimeStamp (sample_buf);
frame->pts = CMTIME_TO_GST_CLOCK_TIME (pts);
CMTime dts = CMSampleBufferGetOutputDecodeTimeStamp (sample_buf);
if (CMTIME_IS_VALID (dts)) {
frame->dts = CMTIME_TO_GST_CLOCK_TIME (dts) - self->dts_offset;
}
}
static GstFlowReturn
gst_vtenc_encode_frame (GstVTEnc * self, GstVideoCodecFrame * frame)
{
@ -1886,6 +1939,8 @@ gst_vtenc_enqueue_buffer (void *outputCallbackRefCon,
* to enable the use of the video meta API on the core media buffer */
frame->output_buffer = gst_core_media_buffer_new (sampleBuffer, FALSE, NULL);
gst_vtenc_update_timestamps (self, frame, sampleBuffer);
beach:
/* needed anyway so the frame will be released */
if (frame)

View file

@ -21,6 +21,7 @@
#define __GST_VTENC_H__
#include <gst/gst.h>
#include <gst/codecparsers/gsth264parser.h>
#include <gst/video/video.h>
#include <VideoToolbox/VideoToolbox.h>
@ -60,6 +61,7 @@ struct _GstVTEnc
CMVideoCodecType specific_format_id;
CFStringRef profile_level;
GstH264Profile h264_profile;
guint bitrate;
gboolean allow_frame_reordering;
gboolean realtime;
@ -81,6 +83,7 @@ struct _GstVTEnc
GstVideoInfo video_info;
VTCompressionSessionRef session;
CFDictionaryRef keyframe_props;
GstClockTime dts_offset;
GAsyncQueue * cur_outframes;
};

View file

@ -7,6 +7,10 @@ tests = [
{'path': 'opencv/cvtracker'},
{'path': 'testsrcbin/caps_spec'},
{'path': 'wpe/load_bytes_first', 'skip': not building_wpe},
{'path': 'vtenc/vtenc_h264', 'skip': not applemedia_found_deps},
{'path': 'vtenc/vtenc_h264_b_frames', 'skip': not applemedia_found_deps},
{'path': 'vtenc/vtenc_h265', 'skip': not applemedia_found_deps},
{'path': 'vtenc/vtenc_h265_b_frames', 'skip': not applemedia_found_deps},
]
env = environment()

View file

@ -0,0 +1,7 @@
meta,
args = {
"videotestsrc num-buffers=5 ! video/x-raw,framerate=25/1,width=320,height=240 ! vtenc_h264 name=enc allow-frame-reordering=false ! fakesink",
},
configs = {
"$(validateflow), pad=enc:src, record-buffers=true",
}

View file

@ -0,0 +1,9 @@
event stream-start: GstEventStreamStart, flags=(GstStreamFlags)GST_STREAM_FLAG_NONE, group-id=(uint)1;
event caps: video/x-h264, alignment=(string)au, chroma-site=(string)jpeg, codec_data=(buffer)014d000dffe1000d274d000dab40a0fd350601060201000428ee3c80, colorimetry=(string)bt601, framerate=(fraction)25/1, height=(int)240, interlace-mode=(string)progressive, level=(string)1.3, multiview-flags=(GstVideoMultiviewFlagsSet)0:ffffffff:/right-view-first/left-flipped/left-flopped/right-flipped/right-flopped/half-aspect/mixed-mono, multiview-mode=(string)mono, pixel-aspect-ratio=(fraction)1/1, profile=(string)main, stream-format=(string)avc, width=(int)320;
event segment: format=TIME, start=1000:00:00.000000000, offset=0:00:00.000000000, stop=none, time=0:00:00.000000000, base=0:00:00.000000000, position=1000:00:00.000000000
buffer: dts=1000:00:00.000000000, pts=1000:00:00.000000000, dur=0:00:00.040000000, flags=discont marker tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.040000000, pts=1000:00:00.040000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.080000000, pts=1000:00:00.080000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.120000000, pts=1000:00:00.120000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.160000000, pts=1000:00:00.160000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
event eos: (no structure)

View file

@ -0,0 +1,7 @@
meta,
args = {
"videotestsrc num-buffers=5 ! video/x-raw,framerate=25/1,width=320,height=240 ! vtenc_h264 name=enc allow-frame-reordering=true ! fakesink",
},
configs = {
"$(validateflow), pad=enc:src, record-buffers=true",
}

View file

@ -0,0 +1,9 @@
event stream-start: GstEventStreamStart, flags=(GstStreamFlags)GST_STREAM_FLAG_NONE, group-id=(uint)1;
event caps: video/x-h264, alignment=(string)au, chroma-site=(string)jpeg, codec_data=(buffer)014d000dffe1000d274d000dab60a0fd350601060201000428ee3c80, colorimetry=(string)bt601, framerate=(fraction)25/1, height=(int)240, interlace-mode=(string)progressive, level=(string)1.3, multiview-flags=(GstVideoMultiviewFlagsSet)0:ffffffff:/right-view-first/left-flipped/left-flopped/right-flipped/right-flopped/half-aspect/mixed-mono, multiview-mode=(string)mono, pixel-aspect-ratio=(fraction)1/1, profile=(string)main, stream-format=(string)avc, width=(int)320;
event segment: format=TIME, start=1000:00:00.000000000, offset=0:00:00.000000000, stop=none, time=0:00:00.000000000, base=0:00:00.000000000, position=1000:00:00.000000000
buffer: dts=999:59:59.960000000, pts=1000:00:00.000000000, dur=0:00:00.040000000, flags=discont marker tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.000000000, pts=1000:00:00.080000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.040000000, pts=1000:00:00.040000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.080000000, pts=1000:00:00.160000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.120000000, pts=1000:00:00.120000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
event eos: (no structure)

View file

@ -0,0 +1,7 @@
meta,
args = {
"videotestsrc num-buffers=5 ! video/x-raw,framerate=25/1,width=320,height=240 ! vtenc_h264 name=enc allow-frame-reordering=false ! fakesink",
},
configs = {
"$(validateflow), pad=enc:src, record-buffers=true",
}

View file

@ -0,0 +1,9 @@
event stream-start: GstEventStreamStart, flags=(GstStreamFlags)GST_STREAM_FLAG_NONE, group-id=(uint)1;
event caps: video/x-h264, alignment=(string)au, chroma-site=(string)jpeg, codec_data=(buffer)014d000dffe1000d274d000dab40a0fd350601060201000428ee3c80, colorimetry=(string)bt601, framerate=(fraction)25/1, height=(int)240, interlace-mode=(string)progressive, level=(string)1.3, multiview-flags=(GstVideoMultiviewFlagsSet)0:ffffffff:/right-view-first/left-flipped/left-flopped/right-flipped/right-flopped/half-aspect/mixed-mono, multiview-mode=(string)mono, pixel-aspect-ratio=(fraction)1/1, profile=(string)main, stream-format=(string)avc, width=(int)320;
event segment: format=TIME, start=1000:00:00.000000000, offset=0:00:00.000000000, stop=none, time=0:00:00.000000000, base=0:00:00.000000000, position=1000:00:00.000000000
buffer: dts=1000:00:00.000000000, pts=1000:00:00.000000000, dur=0:00:00.040000000, flags=discont marker tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.040000000, pts=1000:00:00.040000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.080000000, pts=1000:00:00.080000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.120000000, pts=1000:00:00.120000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.160000000, pts=1000:00:00.160000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
event eos: (no structure)

View file

@ -0,0 +1,7 @@
meta,
args = {
"videotestsrc num-buffers=5 ! video/x-raw,framerate=25/1,width=320,height=240 ! vtenc_h264 name=enc allow-frame-reordering=true ! fakesink",
},
configs = {
"$(validateflow), pad=enc:src, record-buffers=true",
}

View file

@ -0,0 +1,9 @@
event stream-start: GstEventStreamStart, flags=(GstStreamFlags)GST_STREAM_FLAG_NONE, group-id=(uint)1;
event caps: video/x-h264, alignment=(string)au, chroma-site=(string)jpeg, codec_data=(buffer)014d000dffe1000d274d000dab60a0fd350601060201000428ee3c80, colorimetry=(string)bt601, framerate=(fraction)25/1, height=(int)240, interlace-mode=(string)progressive, level=(string)1.3, multiview-flags=(GstVideoMultiviewFlagsSet)0:ffffffff:/right-view-first/left-flipped/left-flopped/right-flipped/right-flopped/half-aspect/mixed-mono, multiview-mode=(string)mono, pixel-aspect-ratio=(fraction)1/1, profile=(string)main, stream-format=(string)avc, width=(int)320;
event segment: format=TIME, start=1000:00:00.000000000, offset=0:00:00.000000000, stop=none, time=0:00:00.000000000, base=0:00:00.000000000, position=1000:00:00.000000000
buffer: dts=999:59:59.960000000, pts=1000:00:00.000000000, dur=0:00:00.040000000, flags=discont marker tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.000000000, pts=1000:00:00.080000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.040000000, pts=1000:00:00.040000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.080000000, pts=1000:00:00.160000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
buffer: dts=1000:00:00.120000000, pts=1000:00:00.120000000, dur=0:00:00.040000000, flags=marker delta-unit tag-memory, meta=GstCoreMediaMeta
event eos: (no structure)