svtjpegxsdec: add support for interlaced video

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/8219>
2025-02-14 02:05:39 +00:00 · 2024-12-25 22:54:16 +01:00 · 2024-12-25 22:54:16 +01:00 · 306200e10e
commit 306200e10e
parent 71aacec532
1 changed files with 107 additions and 28 deletions
--- a/subprojects/gst-plugins-bad/ext/svtjpegxs/gstsvtjpegxsdec.c
+++ b/subprojects/gst-plugins-bad/ext/svtjpegxs/gstsvtjpegxsdec.c
@ -7,8 +7,8 @@
 /**
 * SECTION:element-svtjpegxsdec
 *
- * The svtjpegxsdec element does JPEG XS decoding using Scalable
- * Video Technology for JPEG XS Decoder (SVT JPEG XS Decoder).
+ * The svtjpegxsdec element does JPEG XS decoding using the Scalable
+ * Video Technology for JPEG XS Decoder (SVT JPEG XS Decoder) library.
 *
 * See https://jpeg.org/jpegxs/ for more information about the JPEG XS format.
 *
@ -101,18 +101,17 @@ enum
 static GstStaticPadTemplate sink_pad_template =
 GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("image/x-jxsc, alignment = frame, "
-        "interlace-mode = progressive, "
+        "interlace-mode = { progressive, fields }, "
        "sampling = { YCbCr-4:4:4, YCbCr-4:2:2, YCbCr-4:2:0 }, "
        "depth = { 8, 10, 12 }"));

 // FIXME: add 4:2:2 and 4:4:4 packed formats
-// Only handle progressive mode for now
 static GstStaticPadTemplate src_pad_template =
 GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("video/x-raw, "    //
-        "format = (string) { " SUPPORTED_FORMATS " },"
-        "interlace-mode = progressive, "
-        "width = (int) [16, 16384], " "height = (int) [16, 16384], "
+        "format = (string) { " SUPPORTED_FORMATS " }, "
+        "interlace-mode = { progressive, interleaved }, "
+        "width = (int) [16, 16384], height = (int) [16, 16384], "
        "framerate = (fraction) [0, MAX]"));

 #define gst_svt_jpeg_xs_dec_parent_class parent_class
@ -280,7 +279,7 @@ static const uint32_t FAST_SEARCH = 1;
 static const uint32_t FRAME_BASED = 0;

 static GstFlowReturn
-gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec,
+gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec, guint n_fields,
    const guint8 * data, gsize size)
 {
  svt_jpeg_xs_image_config_t img_config;
@ -295,10 +294,10 @@ gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec,
        ("Couldn't probe input frame headers, error code: 0x%08x", dec_ret));
    return GST_FLOW_ERROR;
  }
-  // We expect complete frames as input
-  if (size != expected_frame_size) {
+  // We expect complete frames as input (or two codestreams, one per field)
+  if (size < expected_frame_size) {
    GST_ELEMENT_ERROR (jxsdec, STREAM, DECODE, (NULL),
-        ("Input frame size does not match expected size, %zu != %u",
+        ("Input frame size smaller than expected size, %zu < %u",
            size, expected_frame_size));
    return GST_FLOW_ERROR;
  }
@ -345,9 +344,9 @@ gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec,

  svt_jpeg_xs_image_config_t *cfg = &jxsdec->img_config;

-  GST_INFO_OBJECT (jxsdec, "Output image configuration:");
+  GST_INFO_OBJECT (jxsdec, "Output image configuration (%u fields):", n_fields);
  GST_INFO_OBJECT (jxsdec, "  width: %u", cfg->width);
-  GST_INFO_OBJECT (jxsdec, "  height: %u", cfg->height);
+  GST_INFO_OBJECT (jxsdec, "  height: %u", cfg->height * n_fields);
  GST_INFO_OBJECT (jxsdec, "  depth: %u", cfg->bit_depth);
  GST_INFO_OBJECT (jxsdec, "  format: %s", format_to_format_name (cfg->format));
  GST_INFO_OBJECT (jxsdec, "  components: %u", cfg->components_num);
@ -418,7 +417,14 @@ gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec,

  jxsdec->output_state =
      gst_video_decoder_set_output_state (GST_VIDEO_DECODER (jxsdec), fmt,
-      cfg->width, cfg->height, jxsdec->input_state);
+      cfg->width, cfg->height * n_fields, jxsdec->input_state);
+
+  // The input caps have interlace-mode=fields since that seems the best
+  // mapping, but we will output interlace-mode=interleaved.
+  if (n_fields == 2) {
+    GST_VIDEO_INFO_INTERLACE_MODE (&jxsdec->output_state->info) =
+        GST_VIDEO_INTERLACE_MODE_INTERLEAVED;
+  }

  return GST_FLOW_OK;
 }
@ -426,7 +432,8 @@ gst_svt_jpeg_xs_dec_init_decoder (GstSvtJpegXsDec * jxsdec,
 // The codestream data is either a full progressive image or a single field.
 static GstFlowReturn
 gst_svt_jpeg_xs_dec_decode_codestream (GstSvtJpegXsDec * jxsdec,
-    GstVideoFrame * video_frame, const guint8 * data, gsize data_size)
+    guint field, guint n_fields, GstVideoFrame * video_frame,
+    const guint8 * data, gsize data_size)
 {
  // Decoder input/output frame struct
  svt_jpeg_xs_frame_t decoder_frame;
@ -450,24 +457,30 @@ gst_svt_jpeg_xs_dec_decode_codestream (GstSvtJpegXsDec * jxsdec,
    svt_jpeg_xs_image_buffer_t img = { {0,}
    };

-    img.data_yuv[0] = GST_VIDEO_FRAME_PLANE_DATA (video_frame, 0);
-    img.data_yuv[1] = GST_VIDEO_FRAME_PLANE_DATA (video_frame, 1);
-    img.data_yuv[2] = GST_VIDEO_FRAME_PLANE_DATA (video_frame, 2);
+    img.data_yuv[0] = (guint8 *) GST_VIDEO_FRAME_PLANE_DATA (video_frame, 0)
+        + field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0);
+    img.data_yuv[1] = (guint8 *) GST_VIDEO_FRAME_PLANE_DATA (video_frame, 1)
+        + field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1);
+    img.data_yuv[2] = (guint8 *) GST_VIDEO_FRAME_PLANE_DATA (video_frame, 2)
+        + field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2);

    // Note: wants stride in pixels not in bytes (might need tweaks for 10-bit)
-    img.stride[0] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0)
+    img.stride[0] = n_fields * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0)
        / GST_VIDEO_FRAME_COMP_PSTRIDE (video_frame, 0);
-    img.stride[1] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1)
+    img.stride[1] = n_fields * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1)
        / GST_VIDEO_FRAME_COMP_PSTRIDE (video_frame, 1);
-    img.stride[2] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2)
+    img.stride[2] = n_fields * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2)
        / GST_VIDEO_FRAME_COMP_PSTRIDE (video_frame, 2);

    img.alloc_size[0] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0)
-        * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 0);
+        * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 0)
+        - field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 0);
    img.alloc_size[1] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1)
-        * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 1);
+        * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 1)
+        - field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 1);
    img.alloc_size[2] = GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2)
-        * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 2);
+        * GST_VIDEO_FRAME_COMP_HEIGHT (video_frame, 2)
+        - field * GST_VIDEO_FRAME_COMP_STRIDE (video_frame, 2);

    for (int i = 0; i < 3; ++i) {
      GST_TRACE_OBJECT (jxsdec, "img stride[%u] = %u, alloc_size[%u]: %u",
@ -479,7 +492,7 @@ gst_svt_jpeg_xs_dec_decode_codestream (GstSvtJpegXsDec * jxsdec,

  decoder_frame.user_prv_ctx_ptr = NULL;

-  GST_TRACE_OBJECT (jxsdec, "Sending frame to decoder ..");
+  GST_TRACE_OBJECT (jxsdec, "Sending frame or field to decoder ..");

  SvtJxsErrorType_t dec_ret;

@ -538,6 +551,7 @@ gst_svt_jpeg_xs_dec_handle_frame (GstVideoDecoder * vdecoder,
 {
  GstSvtJpegXsDec *jxsdec = GST_SVT_JPEG_XS_DEC (vdecoder);
  GstFlowReturn flow = GST_FLOW_OK;
+  guint n_fields;

  GST_LOG_OBJECT (jxsdec, "Frame to decode, size: %zu bytes",
      gst_buffer_get_size (frame->input_buffer));
@ -547,6 +561,12 @@ gst_svt_jpeg_xs_dec_handle_frame (GstVideoDecoder * vdecoder,
    return GST_FLOW_NOT_NEGOTIATED;
  }

+  if (GST_VIDEO_INFO_IS_INTERLACED (&jxsdec->input_state->info)) {
+    n_fields = 2;
+  } else {
+    n_fields = 1;
+  }
+
  GstMapInfo in_map = GST_MAP_INFO_INIT;
  GstVideoFrame video_frame = GST_VIDEO_FRAME_INIT;

@ -555,7 +575,10 @@ gst_svt_jpeg_xs_dec_handle_frame (GstVideoDecoder * vdecoder,
    goto input_buffer_map_failure;

  if (jxsdec->jxs_decoder == NULL) {
-    flow = gst_svt_jpeg_xs_dec_init_decoder (jxsdec, in_map.data, in_map.size);
+    flow =
+        gst_svt_jpeg_xs_dec_init_decoder (jxsdec, n_fields, in_map.data,
+        in_map.size);
+
    if (flow != GST_FLOW_OK)
      goto out_unmap;
  }
@ -572,8 +595,64 @@ gst_svt_jpeg_xs_dec_handle_frame (GstVideoDecoder * vdecoder,
          frame->output_buffer, GST_MAP_WRITE))
    goto output_frame_map_error;

-  /* XXX = */ gst_svt_jpeg_xs_dec_decode_codestream (jxsdec, &video_frame,
-      in_map.data, in_map.size);
+  // Decode frame or fields
+  gsize offset = 0;
+
+  for (guint field = 0; field < n_fields; ++field) {
+    // Figure out where the second codestream/field starts in the data
+    // (should be size/2 unless vbr encoding was used, which is unlikely).
+    if (field == 1) {
+      svt_jpeg_xs_image_config_t img_config;
+      SvtJxsErrorType_t parse_ret;
+      uint32_t frame_size = 0;
+
+      parse_ret = svt_jpeg_xs_decoder_get_single_frame_size (in_map.data,
+          in_map.size, &img_config, &frame_size, FAST_SEARCH);
+
+      // This shouldn't really happen because at this point we have already
+      // decoded the first field without errors. In the unlikely case it does
+      // happen, let's check the midpoint for the second codestream header and
+      // otherwise just decode the first field again into the second field.
+      if (parse_ret != SvtJxsErrorNone) {
+        if (in_map.size >= 8 &&
+            GST_READ_UINT32_BE (in_map.data + in_map.size / 2) == 0xff10ff50) {
+          frame_size = in_map.size / 2;
+        } else {
+          GST_ELEMENT_WARNING (jxsdec, STREAM, DECODE, (NULL),
+              ("Unexpected error 0x%08x getting codestream length of field 1 "
+                  "to determine offset of codestream for field 2. Will decode "
+                  "field 1 again into field 2", parse_ret));
+          frame_size = 0;
+        }
+      }
+
+      offset = frame_size;
+    }
+
+    if (n_fields == 2) {
+      GST_TRACE_OBJECT (jxsdec,
+          "Decoding field %u of 2 @ %zu", field + 1, offset);
+    }
+
+    flow = gst_svt_jpeg_xs_dec_decode_codestream (jxsdec, field, n_fields,
+        &video_frame, in_map.data + offset, in_map.size - offset);
+
+    if (flow != GST_FLOW_OK)
+      break;
+  }
+
+  if (n_fields == 2) {
+    GST_BUFFER_FLAG_SET (video_frame.buffer, GST_VIDEO_BUFFER_FLAG_INTERLACED);
+    if ((video_frame.flags & GST_VIDEO_FRAME_FLAG_TFF)) {
+      GST_BUFFER_FLAG_SET (video_frame.buffer, GST_VIDEO_BUFFER_FLAG_TFF);
+    } else {
+      GST_BUFFER_FLAG_UNSET (video_frame.buffer, GST_VIDEO_BUFFER_FLAG_TFF);
+    }
+  } else {
+    GST_BUFFER_FLAG_UNSET (video_frame.buffer,
+        GST_VIDEO_BUFFER_FLAG_INTERLACED);
+    GST_BUFFER_FLAG_UNSET (video_frame.buffer, GST_VIDEO_BUFFER_FLAG_TFF);
+  }

  // Unmap output frame and input buffer
  {