From 306200e10e165b798773ee094736adac533764c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim-Philipp=20M=C3=BCller?= Date: Wed, 25 Dec 2024 22:54:16 +0100 Subject: [PATCH] svtjpegxsdec: add support for interlaced video Part-of: --- .../ext/svtjpegxs/gstsvtjpegxsdec.c | 135 ++++++++++++++---- 1 file changed, 107 insertions(+), 28 deletions(-) diff --git a/subprojects/gst-plugins-bad/ext/svtjpegxs/gstsvtjpegxsdec.c b/subprojects/gst-plugins-bad/ext/svtjpegxs/gstsvtjpegxsdec.c index 379c9d395d..cf20723050 100644 --- a/subprojects/gst-plugins-bad/ext/svtjpegxs/gstsvtjpegxsdec.c +++ b/subprojects/gst-plugins-bad/ext/svtjpegxs/gstsvtjpegxsdec.c @@ -7,8 +7,8 @@ /** * SECTION:element-svtjpegxsdec * - * The svtjpegxsdec element does JPEG XS decoding using Scalable - * Video Technology for JPEG XS Decoder (SVT JPEG XS Decoder). + * The svtjpegxsdec element does JPEG XS decoding using the Scalable + * Video Technology for JPEG XS Decoder (SVT JPEG XS Decoder) library. * * See https://jpeg.org/jpegxs/ for more information about the JPEG XS format. * @@ -101,18 +101,17 @@ enum static GstStaticPadTemplate sink_pad_template = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS ("image/x-jxsc, alignment = frame, " - "interlace-mode = progressive, " + "interlace-mode = { progressive, fields }, " "sampling = { YCbCr-4:4:4, YCbCr-4:2:2, YCbCr-4:2:0 }, " "depth = { 8, 10, 12 }")); // FIXME: add 4:2:2 and 4:4:4 packed formats -// Only handle progressive mode for now static GstStaticPadTemplate src_pad_template = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS ("video/x-raw, " // - "format = (string) { " SUPPORTED_FORMATS " }," - "interlace-mode = progressive, " - "width = (int) [16, 16384], " "height = (int) [16, 16384], " + "format = (string) { " SUPPORTED_FORMATS " }, " + "interlace-mode = { progressive, interleaved }, " + "width = (int) [16, 16384], height = (int) [16, 16384], " "framerate = (fraction) [0, MAX]")); #define gst_svt_jpeg_xs_dec_parent_class parent_class @@ -280,7 +279,7 @@ static const uint32_t FAST_SEARCH = 1; static const uint32_t FRAME_BASED = 0; static GstFlowReturn -gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec, +gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec, guint n_fields, const guint8 * data, gsize size) { svt_jpeg_xs_image_config_t img_config; @@ -295,10 +294,10 @@ gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec, ("Couldn't probe input frame headers, error code: 0x%08x", dec_ret)); return GST_FLOW_ERROR; } - // We expect complete frames as input - if (size != expected_frame_size) { + // We expect complete frames as input (or two codestreams, one per field) + if (size < expected_frame_size) { GST_ELEMENT_ERROR (jxsdec, STREAM, DECODE, (NULL), - ("Input frame size does not match expected size, %zu != %u", + ("Input frame size smaller than expected size, %zu < %u", size, expected_frame_size)); return GST_FLOW_ERROR; } @@ -345,9 +344,9 @@ gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec, svt_jpeg_xs_image_config_t *cfg = &jxsdec->img_config; - GST_INFO_OBJECT (jxsdec, "Output image configuration:"); + GST_INFO_OBJECT (jxsdec, "Output image configuration (%u fields):", n_fields); GST_INFO_OBJECT (jxsdec, " width: %u", cfg->width); - GST_INFO_OBJECT (jxsdec, " height: %u", cfg->height); + GST_INFO_OBJECT (jxsdec, " height: %u", cfg->height * n_fields); GST_INFO_OBJECT (jxsdec, " depth: %u", cfg->bit_depth); GST_INFO_OBJECT (jxsdec, " format: %s", format_to_format_name (cfg->format)); GST_INFO_OBJECT (jxsdec, " components: %u", cfg->components_num); @@ -418,7 +417,14 @@ gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec, jxsdec->output_state = gst_video_decoder_set_output_state (GST_VIDEO_DECODER (jxsdec), fmt, - cfg->width, cfg->height, jxsdec->input_state); + cfg->width, cfg->height * n_fields, jxsdec->input_state); + + // The input caps have interlace-mode=fields since that seems the best + // mapping, but we will output interlace-mode=interleaved. + if (n_fields == 2) { + GST_VIDEO_INFO_INTERLACE_MODE (&jxsdec->output_state->info) = + GST_VIDEO_INTERLACE_MODE_INTERLEAVED; + } return GST_FLOW_OK; } @@ -426,7 +432,8 @@ gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec, // The codestream data is either a full progressive image or a single field. static GstFlowReturn gst_svt_jpeg_xs_dec_decode_codestream (GstSvtJpegXsDec * jxsdec, - GstVideoFrame * video_frame, const guint8 * data, gsize data_size) + guint field, guint n_fields, GstVideoFrame * video_frame, + const guint8 * data, gsize data_size) { // Decoder input/output frame struct svt_jpeg_xs_frame_t decoder_frame; @@ -450,24 +457,30 @@ gst_svt_jpeg_xs_dec_decode_codestream (GstSvtJpegXsDec * jxsdec, svt_jpeg_xs_image_buffer_t img = { {0,} }; - img.data_yuv[0] = GST_VIDEO_FRAME_PLANE_DATA (video_frame, 0); - img.data_yuv[1] = GST_VIDEO_FRAME_PLANE_DATA (video_frame, 1); - img.data_yuv[2] = GST_VIDEO_FRAME_PLANE_DATA (video_frame, 2); + img.data_yuv[0] = (guint8 *) GST_VIDEO_FRAME_PLANE_DATA (video_frame, 0) + + field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0); + img.data_yuv[1] = (guint8 *) GST_VIDEO_FRAME_PLANE_DATA (video_frame, 1) + + field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1); + img.data_yuv[2] = (guint8 *) GST_VIDEO_FRAME_PLANE_DATA (video_frame, 2) + + field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2); // Note: wants stride in pixels not in bytes (might need tweaks for 10-bit) - img.stride[0] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0) + img.stride[0] = n_fields * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0) / GST_VIDEO_FRAME_COMP_PSTRIDE (video_frame, 0); - img.stride[1] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1) + img.stride[1] = n_fields * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1) / GST_VIDEO_FRAME_COMP_PSTRIDE (video_frame, 1); - img.stride[2] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2) + img.stride[2] = n_fields * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2) / GST_VIDEO_FRAME_COMP_PSTRIDE (video_frame, 2); img.alloc_size[0] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0) - * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 0); + * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 0) + - field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0); img.alloc_size[1] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1) - * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 1); + * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 1) + - field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1); img.alloc_size[2] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2) - * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 2); + * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 2) + - field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2); for (int i = 0; i < 3; ++i) { GST_TRACE_OBJECT (jxsdec, "img stride[%u] = %u, alloc_size[%u]: %u", @@ -479,7 +492,7 @@ gst_svt_jpeg_xs_dec_decode_codestream (GstSvtJpegXsDec * jxsdec, decoder_frame.user_prv_ctx_ptr = NULL; - GST_TRACE_OBJECT (jxsdec, "Sending frame to decoder .."); + GST_TRACE_OBJECT (jxsdec, "Sending frame or field to decoder .."); SvtJxsErrorType_t dec_ret; @@ -538,6 +551,7 @@ gst_svt_jpeg_xs_dec_handle_frame (GstVideoDecoder * vdecoder, { GstSvtJpegXsDec *jxsdec = GST_SVT_JPEG_XS_DEC (vdecoder); GstFlowReturn flow = GST_FLOW_OK; + guint n_fields; GST_LOG_OBJECT (jxsdec, "Frame to decode, size: %zu bytes", gst_buffer_get_size (frame->input_buffer)); @@ -547,6 +561,12 @@ gst_svt_jpeg_xs_dec_handle_frame (GstVideoDecoder * vdecoder, return GST_FLOW_NOT_NEGOTIATED; } + if (GST_VIDEO_INFO_IS_INTERLACED (&jxsdec->input_state->info)) { + n_fields = 2; + } else { + n_fields = 1; + } + GstMapInfo in_map = GST_MAP_INFO_INIT; GstVideoFrame video_frame = GST_VIDEO_FRAME_INIT; @@ -555,7 +575,10 @@ gst_svt_jpeg_xs_dec_handle_frame (GstVideoDecoder * vdecoder, goto input_buffer_map_failure; if (jxsdec->jxs_decoder == NULL) { - flow = gst_svt_jpeg_xs_dec_init_decoder (jxsdec, in_map.data, in_map.size); + flow = + gst_svt_jpeg_xs_dec_init_decoder (jxsdec, n_fields, in_map.data, + in_map.size); + if (flow != GST_FLOW_OK) goto out_unmap; } @@ -572,8 +595,64 @@ gst_svt_jpeg_xs_dec_handle_frame (GstVideoDecoder * vdecoder, frame->output_buffer, GST_MAP_WRITE)) goto output_frame_map_error; - /* XXX = */ gst_svt_jpeg_xs_dec_decode_codestream (jxsdec, &video_frame, - in_map.data, in_map.size); + // Decode frame or fields + gsize offset = 0; + + for (guint field = 0; field < n_fields; ++field) { + // Figure out where the second codestream/field starts in the data + // (should be size/2 unless vbr encoding was used, which is unlikely). + if (field == 1) { + svt_jpeg_xs_image_config_t img_config; + SvtJxsErrorType_t parse_ret; + uint32_t frame_size = 0; + + parse_ret = svt_jpeg_xs_decoder_get_single_frame_size (in_map.data, + in_map.size, &img_config, &frame_size, FAST_SEARCH); + + // This shouldn't really happen because at this point we have already + // decoded the first field without errors. In the unlikely case it does + // happen, let's check the midpoint for the second codestream header and + // otherwise just decode the first field again into the second field. + if (parse_ret != SvtJxsErrorNone) { + if (in_map.size >= 8 && + GST_READ_UINT32_BE (in_map.data + in_map.size / 2) == 0xff10ff50) { + frame_size = in_map.size / 2; + } else { + GST_ELEMENT_WARNING (jxsdec, STREAM, DECODE, (NULL), + ("Unexpected error 0x%08x getting codestream length of field 1 " + "to determine offset of codestream for field 2. Will decode " + "field 1 again into field 2", parse_ret)); + frame_size = 0; + } + } + + offset = frame_size; + } + + if (n_fields == 2) { + GST_TRACE_OBJECT (jxsdec, + "Decoding field %u of 2 @ %zu", field + 1, offset); + } + + flow = gst_svt_jpeg_xs_dec_decode_codestream (jxsdec, field, n_fields, + &video_frame, in_map.data + offset, in_map.size - offset); + + if (flow != GST_FLOW_OK) + break; + } + + if (n_fields == 2) { + GST_BUFFER_FLAG_SET (video_frame.buffer, GST_VIDEO_BUFFER_FLAG_INTERLACED); + if ((video_frame.flags & GST_VIDEO_FRAME_FLAG_TFF)) { + GST_BUFFER_FLAG_SET (video_frame.buffer, GST_VIDEO_BUFFER_FLAG_TFF); + } else { + GST_BUFFER_FLAG_UNSET (video_frame.buffer, GST_VIDEO_BUFFER_FLAG_TFF); + } + } else { + GST_BUFFER_FLAG_UNSET (video_frame.buffer, + GST_VIDEO_BUFFER_FLAG_INTERLACED); + GST_BUFFER_FLAG_UNSET (video_frame.buffer, GST_VIDEO_BUFFER_FLAG_TFF); + } // Unmap output frame and input buffer {