From cefd1a665f33a65b8cc38a8f90fe3383428813e3 Mon Sep 17 00:00:00 2001 From: He Junyan Date: Thu, 5 Mar 2020 17:56:51 +0800 Subject: [PATCH] libs: encoder: h265: Add ensure_tile to calculate tiles. We need consider tiles and slices together, separate tiles uniformly and then assign slices uniformly to each tiles. Part-of: --- gst-libs/gst/vaapi/gstvaapiencoder_h265.c | 310 ++++++++++++++++++++++ 1 file changed, 310 insertions(+) diff --git a/gst-libs/gst/vaapi/gstvaapiencoder_h265.c b/gst-libs/gst/vaapi/gstvaapiencoder_h265.c index 16fb90d720..f48cdaf4e5 100644 --- a/gst-libs/gst/vaapi/gstvaapiencoder_h265.c +++ b/gst-libs/gst/vaapi/gstvaapiencoder_h265.c @@ -119,6 +119,13 @@ struct _GstVaapiEncoderH265 gboolean low_delay_b; guint32 num_tile_cols; guint32 num_tile_rows; + /* CTUs start address used in stream pack */ + guint32 *tile_slice_address; + /* CTUs in this slice */ + guint32 *tile_slice_ctu_num; + /* map the tile_slice_address to CTU start address in picture, + which is used by VA API. */ + guint32 *tile_slice_address_map; /* maximum required size of the decoded picture buffer */ guint32 max_dec_pic_buffering; @@ -1774,6 +1781,11 @@ fill_sequence (GstVaapiEncoderH265 * encoder, GstVaapiEncSequence * sequence) return TRUE; } +/* CTUs in each tile column */ +static guint32 tile_ctu_cols[GST_VAAPI_H265_MAX_COL_TILES]; +/* CTUs in each tile row */ +static guint32 tile_ctu_rows[GST_VAAPI_H265_MAX_ROW_TILES]; + /* Fills in VA picture parameter buffer */ static gboolean fill_picture (GstVaapiEncoderH265 * encoder, GstVaapiEncPicture * picture, @@ -2374,6 +2386,299 @@ reset_properties (GstVaapiEncoderH265 * encoder) reorder_pool->frame_index = 0; } +static void +reset_tile (GstVaapiEncoderH265 * encoder) +{ + memset (tile_ctu_cols, 0, sizeof (tile_ctu_cols)); + memset (tile_ctu_rows, 0, sizeof (tile_ctu_rows)); + + if (encoder->tile_slice_address) + g_free (encoder->tile_slice_address); + encoder->tile_slice_address = NULL; + + if (encoder->tile_slice_ctu_num) + g_free (encoder->tile_slice_ctu_num); + encoder->tile_slice_ctu_num = NULL; + + if (encoder->tile_slice_address_map) + g_free (encoder->tile_slice_address_map); + encoder->tile_slice_address_map = NULL; +} + +static void +recalculate_slices_num_by_tile (GstVaapiEncoderH265 * encoder) +{ + GstVaapiDisplay *const display = GST_VAAPI_ENCODER_DISPLAY (encoder); + + /* If driver has the requirement that the slice should not span tiles, + we need to increase slice number if needed. */ + if (gst_vaapi_display_has_driver_quirks (display, + GST_VAAPI_DRIVER_QUIRK_HEVC_ENC_SLICE_NOT_SPAN_TILE)) { + if (encoder->num_slices < encoder->num_tile_cols * encoder->num_tile_rows) { + /* encoder->num_slices > 1 means user set it */ + if (encoder->num_slices > 1) + GST_WARNING ("user set num-slices to %d, which is smaller than tile" + " num %d. We should make slice not span tiles, just set the" + " num-slices to tile num here.", + encoder->num_slices, + encoder->num_tile_cols * encoder->num_tile_rows); + else + GST_INFO ("set default slice num to %d, the same as the tile num.", + encoder->num_tile_cols * encoder->num_tile_rows); + encoder->num_slices = encoder->num_tile_cols * encoder->num_tile_rows; + } + } +} + +static GstVaapiEncoderStatus +calculate_slices_start_address (GstVaapiEncoderH265 * encoder) +{ + GstVaapiDisplay *const display = GST_VAAPI_ENCODER_DISPLAY (encoder); + guint32 ctu_per_slice; + guint32 left_slices; + gint32 i, j, k; + + /* If driver has the requirement that the slice should not span tiles, + firstly we should scatter slices uniformly into each tile, bigger + tile gets more slices. Then we should assign CTUs within one tile + uniformly to each slice in that tile. */ + if (gst_vaapi_display_has_driver_quirks (display, + GST_VAAPI_DRIVER_QUIRK_HEVC_ENC_SLICE_NOT_SPAN_TILE)) { + guint32 *slices_per_tile = g_malloc (encoder->num_tile_cols * + encoder->num_tile_rows * sizeof (guint32)); + if (!slices_per_tile) + return GST_VAAPI_ENCODER_STATUS_ERROR_ALLOCATION_FAILED; + + ctu_per_slice = (encoder->ctu_width * encoder->ctu_height + + encoder->num_slices - 1) / encoder->num_slices; + g_assert (ctu_per_slice > 0); + left_slices = encoder->num_slices; + + for (i = 0; i < encoder->num_tile_cols * encoder->num_tile_rows; i++) { + slices_per_tile[i] = 1; + left_slices--; + } + while (left_slices) { + /* Find the biggest CTUs/slices, and assign more. */ + gfloat largest = 0.0f; + k = -1; + for (i = 0; i < encoder->num_tile_cols * encoder->num_tile_rows; i++) { + gfloat f; + f = ((gfloat) (tile_ctu_cols[i % encoder->num_tile_cols] * + tile_ctu_rows[i / encoder->num_tile_cols])) / + (gfloat) slices_per_tile[i]; + g_assert (f >= 1.0f); + if (f > largest) { + k = i; + largest = f; + } + } + + g_assert (k >= 0); + slices_per_tile[k]++; + left_slices--; + } + + /* Assign CTUs in one tile uniformly to each slice. Note: the slice start + address is CTB address in tile scan(see spec 6.5), that is, we accumulate + all CTUs in tile0, then tile1, and tile2..., not from the picture's + perspective. */ + encoder->tile_slice_address[0] = 0; + k = 1; + for (i = 0; i < encoder->num_tile_rows; i++) { + for (j = 0; j < encoder->num_tile_cols; j++) { + guint32 s_num = slices_per_tile[i * encoder->num_tile_cols + j]; + guint32 one_tile_ctus = tile_ctu_cols[j] * tile_ctu_rows[i]; + guint32 s; + + GST_LOG ("Tile(row %d col %d), has CTU in col %d," + " CTU in row is %d, total CTU %d, assigned %d slices", i, j, + tile_ctu_cols[j], tile_ctu_rows[i], one_tile_ctus, s_num); + + g_assert (s_num > 0); + for (s = 0; s < s_num; s++) { + encoder->tile_slice_address[k] = + encoder->tile_slice_address[k - 1] + ((s + + 1) * one_tile_ctus) / s_num - (s * one_tile_ctus) / s_num; + encoder->tile_slice_ctu_num[k - 1] = + encoder->tile_slice_address[k] - encoder->tile_slice_address[k - + 1]; + k++; + } + } + } + + g_assert (k == encoder->num_slices + 1); + /* Calculate the last one */ + encoder->tile_slice_ctu_num[encoder->num_slices - 1] = + encoder->ctu_width * encoder->ctu_height - + encoder->tile_slice_address[encoder->num_slices - 1]; + + g_free (slices_per_tile); + } + /* The easy way, just assign CTUs to each slice uniformly */ + else { + ctu_per_slice = (encoder->ctu_width * encoder->ctu_height + + encoder->num_slices - 1) / encoder->num_slices; + g_assert (ctu_per_slice > 0); + + for (i = 0; i < encoder->num_slices - 1; i++) + encoder->tile_slice_ctu_num[i] = ctu_per_slice; + encoder->tile_slice_ctu_num[encoder->num_slices - 1] = + encoder->ctu_width * encoder->ctu_height - + (encoder->num_slices - 1) * ctu_per_slice; + + encoder->tile_slice_address[0] = 0; + for (i = 1; i <= encoder->num_slices; i++) + encoder->tile_slice_address[i] = encoder->tile_slice_address[i - 1] + + encoder->tile_slice_ctu_num[i - 1]; + } + + return GST_VAAPI_ENCODER_STATUS_SUCCESS; +} + +static GstVaapiEncoderStatus +ensure_tile (GstVaapiEncoderH265 * encoder) +{ + gint32 i, j, k; + guint32 ctu_tile_width_accu[GST_VAAPI_H265_MAX_COL_TILES + 1]; + guint32 ctu_tile_height_accu[GST_VAAPI_H265_MAX_ROW_TILES + 1]; + guint32 num_slices; + GstVaapiEncoderStatus ret; + + reset_tile (encoder); + + if (!h265_is_tile_enabled (encoder)) + return GST_VAAPI_ENCODER_STATUS_SUCCESS; + + if (!gst_vaapi_encoder_ensure_tile_support (GST_VAAPI_ENCODER (encoder), + encoder->profile, encoder->entrypoint)) { + GST_ERROR ("The profile:%s, entrypoint:%d does not support tile.", + gst_vaapi_utils_h265_get_profile_string (encoder->profile), + encoder->entrypoint); + return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN; + } + + if (encoder->num_tile_cols > + gst_vaapi_utils_h265_get_level_limits (encoder->level)->MaxTileColumns) { + GST_ERROR ("num_tile_cols:%d exceeds MaxTileColumns:%d", + encoder->num_tile_cols, + gst_vaapi_utils_h265_get_level_limits (encoder->level)->MaxTileColumns); + return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN; + } + if (encoder->num_tile_rows > + gst_vaapi_utils_h265_get_level_limits (encoder->level)->MaxTileRows) { + GST_ERROR ("num_tile_rows:%d exceeds MaxTileRows:%d", + encoder->num_tile_rows, + gst_vaapi_utils_h265_get_level_limits (encoder->level)->MaxTileRows); + return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN; + } + + if (encoder->ctu_width < encoder->num_tile_cols) { + GST_WARNING + ("Only %d CTUs in width, not enough to split into %d tile columns", + encoder->ctu_width, encoder->num_tile_cols); + return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN; + } + if (encoder->ctu_height < encoder->num_tile_rows) { + GST_WARNING + ("Only %d CTUs in height, not enough to split into %d tile rows", + encoder->ctu_height, encoder->num_tile_rows); + return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN; + } + + recalculate_slices_num_by_tile (encoder); + + /* ensure not exceed max supported slices */ + num_slices = encoder->num_slices; + gst_vaapi_encoder_ensure_num_slices (GST_VAAPI_ENCODER_CAST (encoder), + encoder->profile, encoder->entrypoint, + (encoder->ctu_width * encoder->ctu_height + 1) / 2, &num_slices); + if (num_slices != encoder->num_slices) { + GST_ERROR ("The tile setting need at least %d slices, but the max" + " slice number is just %d", encoder->num_slices, num_slices); + return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN; + } + + encoder->tile_slice_address = + /* Add one as sentinel, hold val to calculate ctu_num */ + g_malloc ((encoder->num_slices + 1) * sizeof (guint32)); + if (!encoder->tile_slice_address) + return GST_VAAPI_ENCODER_STATUS_ERROR_ALLOCATION_FAILED; + encoder->tile_slice_ctu_num = + g_malloc (encoder->num_slices * sizeof (guint32)); + if (!encoder->tile_slice_ctu_num) + return GST_VAAPI_ENCODER_STATUS_ERROR_ALLOCATION_FAILED; + encoder->tile_slice_address_map = + g_malloc (encoder->ctu_width * encoder->ctu_height * sizeof (guint32)); + if (!encoder->tile_slice_address_map) + return GST_VAAPI_ENCODER_STATUS_ERROR_ALLOCATION_FAILED; + + /* firstly uniformly separate CTUs into tiles, as the spec 6.5.1 define */ + for (i = 0; i < encoder->num_tile_cols; i++) + tile_ctu_cols[i] = + ((i + 1) * encoder->ctu_width) / encoder->num_tile_cols - + (i * encoder->ctu_width) / encoder->num_tile_cols; + for (i = 0; i < encoder->num_tile_rows; i++) + tile_ctu_rows[i] = + ((i + 1) * encoder->ctu_height) / encoder->num_tile_rows - + (i * encoder->ctu_height) / encoder->num_tile_rows; + + ret = calculate_slices_start_address (encoder); + if (ret != GST_VAAPI_ENCODER_STATUS_SUCCESS) + return ret; + + /* Build the map to specifying the conversion between a CTB address in CTB + raster scan of a picture and a CTB address in tile scan(see spec 6.5.1 + for details). */ + ctu_tile_width_accu[0] = 0; + for (i = 1; i <= encoder->num_tile_cols; i++) + ctu_tile_width_accu[i] = ctu_tile_width_accu[i - 1] + tile_ctu_cols[i - 1]; + ctu_tile_height_accu[0] = 0; + for (i = 1; i <= encoder->num_tile_rows; i++) + ctu_tile_height_accu[i] = + ctu_tile_height_accu[i - 1] + tile_ctu_rows[i - 1]; + + for (k = 0; k < encoder->ctu_width * encoder->ctu_height; k++) { + /* The ctu coordinate in the picture. */ + guint32 x = k % encoder->ctu_width; + guint32 y = k / encoder->ctu_width; + /* The ctu coordinate in the tile mode. */ + guint32 tile_x = 0; + guint32 tile_y = 0; + /* The index of the CTU in the tile mode. */ + guint32 tso = 0; + + for (i = 0; i < encoder->num_tile_cols; i++) + if (x >= ctu_tile_width_accu[i]) + tile_x = i; + g_assert (tile_x <= encoder->num_tile_cols - 1); + + for (j = 0; j < encoder->num_tile_rows; j++) + if (y >= ctu_tile_height_accu[j]) + tile_y = j; + g_assert (tile_y <= encoder->num_tile_rows - 1); + + /* add all ctus in the tiles the same line before us */ + for (i = 0; i < tile_x; i++) + tso += tile_ctu_rows[tile_y] * tile_ctu_cols[i]; + + /* add all ctus in the tiles above us */ + for (j = 0; j < tile_y; j++) + tso += encoder->ctu_width * tile_ctu_rows[j]; + + /* add the ctus inside the same tile before us */ + tso += (y - ctu_tile_height_accu[tile_y]) * tile_ctu_cols[tile_x] + + x - ctu_tile_width_accu[tile_x]; + + g_assert (tso < encoder->ctu_width * encoder->ctu_height); + + encoder->tile_slice_address_map[tso] = k; + } + + return GST_VAAPI_ENCODER_STATUS_SUCCESS; +} + static GstVaapiEncoderStatus gst_vaapi_encoder_h265_encode (GstVaapiEncoder * base_encoder, GstVaapiEncPicture * picture, GstVaapiCodedBufferProxy * codedbuf) @@ -2837,6 +3142,9 @@ gst_vaapi_encoder_h265_reconfigure (GstVaapiEncoder * base_encoder) } reset_properties (encoder); + status = ensure_tile (encoder); + if (status != GST_VAAPI_ENCODER_STATUS_SUCCESS) + return status; ensure_control_rate_params (encoder); return set_context_info (base_encoder); } @@ -2907,6 +3215,8 @@ gst_vaapi_encoder_h265_finalize (GObject * object) } g_queue_clear (&reorder_pool->reorder_frame_list); + reset_tile (encoder); + G_OBJECT_CLASS (gst_vaapi_encoder_h265_parent_class)->finalize (object); }