mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-01 22:21:13 +00:00
libs: encoder: h265: Add ensure_tile to calculate tiles.
We need consider tiles and slices together, separate tiles uniformly and then assign slices uniformly to each tiles. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer-vaapi/-/merge_requests/294>
This commit is contained in:
parent
32db615685
commit
cefd1a665f
1 changed files with 310 additions and 0 deletions
|
@ -119,6 +119,13 @@ struct _GstVaapiEncoderH265
|
||||||
gboolean low_delay_b;
|
gboolean low_delay_b;
|
||||||
guint32 num_tile_cols;
|
guint32 num_tile_cols;
|
||||||
guint32 num_tile_rows;
|
guint32 num_tile_rows;
|
||||||
|
/* CTUs start address used in stream pack */
|
||||||
|
guint32 *tile_slice_address;
|
||||||
|
/* CTUs in this slice */
|
||||||
|
guint32 *tile_slice_ctu_num;
|
||||||
|
/* map the tile_slice_address to CTU start address in picture,
|
||||||
|
which is used by VA API. */
|
||||||
|
guint32 *tile_slice_address_map;
|
||||||
|
|
||||||
/* maximum required size of the decoded picture buffer */
|
/* maximum required size of the decoded picture buffer */
|
||||||
guint32 max_dec_pic_buffering;
|
guint32 max_dec_pic_buffering;
|
||||||
|
@ -1774,6 +1781,11 @@ fill_sequence (GstVaapiEncoderH265 * encoder, GstVaapiEncSequence * sequence)
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* CTUs in each tile column */
|
||||||
|
static guint32 tile_ctu_cols[GST_VAAPI_H265_MAX_COL_TILES];
|
||||||
|
/* CTUs in each tile row */
|
||||||
|
static guint32 tile_ctu_rows[GST_VAAPI_H265_MAX_ROW_TILES];
|
||||||
|
|
||||||
/* Fills in VA picture parameter buffer */
|
/* Fills in VA picture parameter buffer */
|
||||||
static gboolean
|
static gboolean
|
||||||
fill_picture (GstVaapiEncoderH265 * encoder, GstVaapiEncPicture * picture,
|
fill_picture (GstVaapiEncoderH265 * encoder, GstVaapiEncPicture * picture,
|
||||||
|
@ -2374,6 +2386,299 @@ reset_properties (GstVaapiEncoderH265 * encoder)
|
||||||
reorder_pool->frame_index = 0;
|
reorder_pool->frame_index = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
reset_tile (GstVaapiEncoderH265 * encoder)
|
||||||
|
{
|
||||||
|
memset (tile_ctu_cols, 0, sizeof (tile_ctu_cols));
|
||||||
|
memset (tile_ctu_rows, 0, sizeof (tile_ctu_rows));
|
||||||
|
|
||||||
|
if (encoder->tile_slice_address)
|
||||||
|
g_free (encoder->tile_slice_address);
|
||||||
|
encoder->tile_slice_address = NULL;
|
||||||
|
|
||||||
|
if (encoder->tile_slice_ctu_num)
|
||||||
|
g_free (encoder->tile_slice_ctu_num);
|
||||||
|
encoder->tile_slice_ctu_num = NULL;
|
||||||
|
|
||||||
|
if (encoder->tile_slice_address_map)
|
||||||
|
g_free (encoder->tile_slice_address_map);
|
||||||
|
encoder->tile_slice_address_map = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
recalculate_slices_num_by_tile (GstVaapiEncoderH265 * encoder)
|
||||||
|
{
|
||||||
|
GstVaapiDisplay *const display = GST_VAAPI_ENCODER_DISPLAY (encoder);
|
||||||
|
|
||||||
|
/* If driver has the requirement that the slice should not span tiles,
|
||||||
|
we need to increase slice number if needed. */
|
||||||
|
if (gst_vaapi_display_has_driver_quirks (display,
|
||||||
|
GST_VAAPI_DRIVER_QUIRK_HEVC_ENC_SLICE_NOT_SPAN_TILE)) {
|
||||||
|
if (encoder->num_slices < encoder->num_tile_cols * encoder->num_tile_rows) {
|
||||||
|
/* encoder->num_slices > 1 means user set it */
|
||||||
|
if (encoder->num_slices > 1)
|
||||||
|
GST_WARNING ("user set num-slices to %d, which is smaller than tile"
|
||||||
|
" num %d. We should make slice not span tiles, just set the"
|
||||||
|
" num-slices to tile num here.",
|
||||||
|
encoder->num_slices,
|
||||||
|
encoder->num_tile_cols * encoder->num_tile_rows);
|
||||||
|
else
|
||||||
|
GST_INFO ("set default slice num to %d, the same as the tile num.",
|
||||||
|
encoder->num_tile_cols * encoder->num_tile_rows);
|
||||||
|
encoder->num_slices = encoder->num_tile_cols * encoder->num_tile_rows;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static GstVaapiEncoderStatus
|
||||||
|
calculate_slices_start_address (GstVaapiEncoderH265 * encoder)
|
||||||
|
{
|
||||||
|
GstVaapiDisplay *const display = GST_VAAPI_ENCODER_DISPLAY (encoder);
|
||||||
|
guint32 ctu_per_slice;
|
||||||
|
guint32 left_slices;
|
||||||
|
gint32 i, j, k;
|
||||||
|
|
||||||
|
/* If driver has the requirement that the slice should not span tiles,
|
||||||
|
firstly we should scatter slices uniformly into each tile, bigger
|
||||||
|
tile gets more slices. Then we should assign CTUs within one tile
|
||||||
|
uniformly to each slice in that tile. */
|
||||||
|
if (gst_vaapi_display_has_driver_quirks (display,
|
||||||
|
GST_VAAPI_DRIVER_QUIRK_HEVC_ENC_SLICE_NOT_SPAN_TILE)) {
|
||||||
|
guint32 *slices_per_tile = g_malloc (encoder->num_tile_cols *
|
||||||
|
encoder->num_tile_rows * sizeof (guint32));
|
||||||
|
if (!slices_per_tile)
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_ALLOCATION_FAILED;
|
||||||
|
|
||||||
|
ctu_per_slice = (encoder->ctu_width * encoder->ctu_height +
|
||||||
|
encoder->num_slices - 1) / encoder->num_slices;
|
||||||
|
g_assert (ctu_per_slice > 0);
|
||||||
|
left_slices = encoder->num_slices;
|
||||||
|
|
||||||
|
for (i = 0; i < encoder->num_tile_cols * encoder->num_tile_rows; i++) {
|
||||||
|
slices_per_tile[i] = 1;
|
||||||
|
left_slices--;
|
||||||
|
}
|
||||||
|
while (left_slices) {
|
||||||
|
/* Find the biggest CTUs/slices, and assign more. */
|
||||||
|
gfloat largest = 0.0f;
|
||||||
|
k = -1;
|
||||||
|
for (i = 0; i < encoder->num_tile_cols * encoder->num_tile_rows; i++) {
|
||||||
|
gfloat f;
|
||||||
|
f = ((gfloat) (tile_ctu_cols[i % encoder->num_tile_cols] *
|
||||||
|
tile_ctu_rows[i / encoder->num_tile_cols])) /
|
||||||
|
(gfloat) slices_per_tile[i];
|
||||||
|
g_assert (f >= 1.0f);
|
||||||
|
if (f > largest) {
|
||||||
|
k = i;
|
||||||
|
largest = f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_assert (k >= 0);
|
||||||
|
slices_per_tile[k]++;
|
||||||
|
left_slices--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assign CTUs in one tile uniformly to each slice. Note: the slice start
|
||||||
|
address is CTB address in tile scan(see spec 6.5), that is, we accumulate
|
||||||
|
all CTUs in tile0, then tile1, and tile2..., not from the picture's
|
||||||
|
perspective. */
|
||||||
|
encoder->tile_slice_address[0] = 0;
|
||||||
|
k = 1;
|
||||||
|
for (i = 0; i < encoder->num_tile_rows; i++) {
|
||||||
|
for (j = 0; j < encoder->num_tile_cols; j++) {
|
||||||
|
guint32 s_num = slices_per_tile[i * encoder->num_tile_cols + j];
|
||||||
|
guint32 one_tile_ctus = tile_ctu_cols[j] * tile_ctu_rows[i];
|
||||||
|
guint32 s;
|
||||||
|
|
||||||
|
GST_LOG ("Tile(row %d col %d), has CTU in col %d,"
|
||||||
|
" CTU in row is %d, total CTU %d, assigned %d slices", i, j,
|
||||||
|
tile_ctu_cols[j], tile_ctu_rows[i], one_tile_ctus, s_num);
|
||||||
|
|
||||||
|
g_assert (s_num > 0);
|
||||||
|
for (s = 0; s < s_num; s++) {
|
||||||
|
encoder->tile_slice_address[k] =
|
||||||
|
encoder->tile_slice_address[k - 1] + ((s +
|
||||||
|
1) * one_tile_ctus) / s_num - (s * one_tile_ctus) / s_num;
|
||||||
|
encoder->tile_slice_ctu_num[k - 1] =
|
||||||
|
encoder->tile_slice_address[k] - encoder->tile_slice_address[k -
|
||||||
|
1];
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
g_assert (k == encoder->num_slices + 1);
|
||||||
|
/* Calculate the last one */
|
||||||
|
encoder->tile_slice_ctu_num[encoder->num_slices - 1] =
|
||||||
|
encoder->ctu_width * encoder->ctu_height -
|
||||||
|
encoder->tile_slice_address[encoder->num_slices - 1];
|
||||||
|
|
||||||
|
g_free (slices_per_tile);
|
||||||
|
}
|
||||||
|
/* The easy way, just assign CTUs to each slice uniformly */
|
||||||
|
else {
|
||||||
|
ctu_per_slice = (encoder->ctu_width * encoder->ctu_height +
|
||||||
|
encoder->num_slices - 1) / encoder->num_slices;
|
||||||
|
g_assert (ctu_per_slice > 0);
|
||||||
|
|
||||||
|
for (i = 0; i < encoder->num_slices - 1; i++)
|
||||||
|
encoder->tile_slice_ctu_num[i] = ctu_per_slice;
|
||||||
|
encoder->tile_slice_ctu_num[encoder->num_slices - 1] =
|
||||||
|
encoder->ctu_width * encoder->ctu_height -
|
||||||
|
(encoder->num_slices - 1) * ctu_per_slice;
|
||||||
|
|
||||||
|
encoder->tile_slice_address[0] = 0;
|
||||||
|
for (i = 1; i <= encoder->num_slices; i++)
|
||||||
|
encoder->tile_slice_address[i] = encoder->tile_slice_address[i - 1] +
|
||||||
|
encoder->tile_slice_ctu_num[i - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static GstVaapiEncoderStatus
|
||||||
|
ensure_tile (GstVaapiEncoderH265 * encoder)
|
||||||
|
{
|
||||||
|
gint32 i, j, k;
|
||||||
|
guint32 ctu_tile_width_accu[GST_VAAPI_H265_MAX_COL_TILES + 1];
|
||||||
|
guint32 ctu_tile_height_accu[GST_VAAPI_H265_MAX_ROW_TILES + 1];
|
||||||
|
guint32 num_slices;
|
||||||
|
GstVaapiEncoderStatus ret;
|
||||||
|
|
||||||
|
reset_tile (encoder);
|
||||||
|
|
||||||
|
if (!h265_is_tile_enabled (encoder))
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_SUCCESS;
|
||||||
|
|
||||||
|
if (!gst_vaapi_encoder_ensure_tile_support (GST_VAAPI_ENCODER (encoder),
|
||||||
|
encoder->profile, encoder->entrypoint)) {
|
||||||
|
GST_ERROR ("The profile:%s, entrypoint:%d does not support tile.",
|
||||||
|
gst_vaapi_utils_h265_get_profile_string (encoder->profile),
|
||||||
|
encoder->entrypoint);
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (encoder->num_tile_cols >
|
||||||
|
gst_vaapi_utils_h265_get_level_limits (encoder->level)->MaxTileColumns) {
|
||||||
|
GST_ERROR ("num_tile_cols:%d exceeds MaxTileColumns:%d",
|
||||||
|
encoder->num_tile_cols,
|
||||||
|
gst_vaapi_utils_h265_get_level_limits (encoder->level)->MaxTileColumns);
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN;
|
||||||
|
}
|
||||||
|
if (encoder->num_tile_rows >
|
||||||
|
gst_vaapi_utils_h265_get_level_limits (encoder->level)->MaxTileRows) {
|
||||||
|
GST_ERROR ("num_tile_rows:%d exceeds MaxTileRows:%d",
|
||||||
|
encoder->num_tile_rows,
|
||||||
|
gst_vaapi_utils_h265_get_level_limits (encoder->level)->MaxTileRows);
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (encoder->ctu_width < encoder->num_tile_cols) {
|
||||||
|
GST_WARNING
|
||||||
|
("Only %d CTUs in width, not enough to split into %d tile columns",
|
||||||
|
encoder->ctu_width, encoder->num_tile_cols);
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN;
|
||||||
|
}
|
||||||
|
if (encoder->ctu_height < encoder->num_tile_rows) {
|
||||||
|
GST_WARNING
|
||||||
|
("Only %d CTUs in height, not enough to split into %d tile rows",
|
||||||
|
encoder->ctu_height, encoder->num_tile_rows);
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
recalculate_slices_num_by_tile (encoder);
|
||||||
|
|
||||||
|
/* ensure not exceed max supported slices */
|
||||||
|
num_slices = encoder->num_slices;
|
||||||
|
gst_vaapi_encoder_ensure_num_slices (GST_VAAPI_ENCODER_CAST (encoder),
|
||||||
|
encoder->profile, encoder->entrypoint,
|
||||||
|
(encoder->ctu_width * encoder->ctu_height + 1) / 2, &num_slices);
|
||||||
|
if (num_slices != encoder->num_slices) {
|
||||||
|
GST_ERROR ("The tile setting need at least %d slices, but the max"
|
||||||
|
" slice number is just %d", encoder->num_slices, num_slices);
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
encoder->tile_slice_address =
|
||||||
|
/* Add one as sentinel, hold val to calculate ctu_num */
|
||||||
|
g_malloc ((encoder->num_slices + 1) * sizeof (guint32));
|
||||||
|
if (!encoder->tile_slice_address)
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_ALLOCATION_FAILED;
|
||||||
|
encoder->tile_slice_ctu_num =
|
||||||
|
g_malloc (encoder->num_slices * sizeof (guint32));
|
||||||
|
if (!encoder->tile_slice_ctu_num)
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_ALLOCATION_FAILED;
|
||||||
|
encoder->tile_slice_address_map =
|
||||||
|
g_malloc (encoder->ctu_width * encoder->ctu_height * sizeof (guint32));
|
||||||
|
if (!encoder->tile_slice_address_map)
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_ERROR_ALLOCATION_FAILED;
|
||||||
|
|
||||||
|
/* firstly uniformly separate CTUs into tiles, as the spec 6.5.1 define */
|
||||||
|
for (i = 0; i < encoder->num_tile_cols; i++)
|
||||||
|
tile_ctu_cols[i] =
|
||||||
|
((i + 1) * encoder->ctu_width) / encoder->num_tile_cols -
|
||||||
|
(i * encoder->ctu_width) / encoder->num_tile_cols;
|
||||||
|
for (i = 0; i < encoder->num_tile_rows; i++)
|
||||||
|
tile_ctu_rows[i] =
|
||||||
|
((i + 1) * encoder->ctu_height) / encoder->num_tile_rows -
|
||||||
|
(i * encoder->ctu_height) / encoder->num_tile_rows;
|
||||||
|
|
||||||
|
ret = calculate_slices_start_address (encoder);
|
||||||
|
if (ret != GST_VAAPI_ENCODER_STATUS_SUCCESS)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/* Build the map to specifying the conversion between a CTB address in CTB
|
||||||
|
raster scan of a picture and a CTB address in tile scan(see spec 6.5.1
|
||||||
|
for details). */
|
||||||
|
ctu_tile_width_accu[0] = 0;
|
||||||
|
for (i = 1; i <= encoder->num_tile_cols; i++)
|
||||||
|
ctu_tile_width_accu[i] = ctu_tile_width_accu[i - 1] + tile_ctu_cols[i - 1];
|
||||||
|
ctu_tile_height_accu[0] = 0;
|
||||||
|
for (i = 1; i <= encoder->num_tile_rows; i++)
|
||||||
|
ctu_tile_height_accu[i] =
|
||||||
|
ctu_tile_height_accu[i - 1] + tile_ctu_rows[i - 1];
|
||||||
|
|
||||||
|
for (k = 0; k < encoder->ctu_width * encoder->ctu_height; k++) {
|
||||||
|
/* The ctu coordinate in the picture. */
|
||||||
|
guint32 x = k % encoder->ctu_width;
|
||||||
|
guint32 y = k / encoder->ctu_width;
|
||||||
|
/* The ctu coordinate in the tile mode. */
|
||||||
|
guint32 tile_x = 0;
|
||||||
|
guint32 tile_y = 0;
|
||||||
|
/* The index of the CTU in the tile mode. */
|
||||||
|
guint32 tso = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < encoder->num_tile_cols; i++)
|
||||||
|
if (x >= ctu_tile_width_accu[i])
|
||||||
|
tile_x = i;
|
||||||
|
g_assert (tile_x <= encoder->num_tile_cols - 1);
|
||||||
|
|
||||||
|
for (j = 0; j < encoder->num_tile_rows; j++)
|
||||||
|
if (y >= ctu_tile_height_accu[j])
|
||||||
|
tile_y = j;
|
||||||
|
g_assert (tile_y <= encoder->num_tile_rows - 1);
|
||||||
|
|
||||||
|
/* add all ctus in the tiles the same line before us */
|
||||||
|
for (i = 0; i < tile_x; i++)
|
||||||
|
tso += tile_ctu_rows[tile_y] * tile_ctu_cols[i];
|
||||||
|
|
||||||
|
/* add all ctus in the tiles above us */
|
||||||
|
for (j = 0; j < tile_y; j++)
|
||||||
|
tso += encoder->ctu_width * tile_ctu_rows[j];
|
||||||
|
|
||||||
|
/* add the ctus inside the same tile before us */
|
||||||
|
tso += (y - ctu_tile_height_accu[tile_y]) * tile_ctu_cols[tile_x]
|
||||||
|
+ x - ctu_tile_width_accu[tile_x];
|
||||||
|
|
||||||
|
g_assert (tso < encoder->ctu_width * encoder->ctu_height);
|
||||||
|
|
||||||
|
encoder->tile_slice_address_map[tso] = k;
|
||||||
|
}
|
||||||
|
|
||||||
|
return GST_VAAPI_ENCODER_STATUS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static GstVaapiEncoderStatus
|
static GstVaapiEncoderStatus
|
||||||
gst_vaapi_encoder_h265_encode (GstVaapiEncoder * base_encoder,
|
gst_vaapi_encoder_h265_encode (GstVaapiEncoder * base_encoder,
|
||||||
GstVaapiEncPicture * picture, GstVaapiCodedBufferProxy * codedbuf)
|
GstVaapiEncPicture * picture, GstVaapiCodedBufferProxy * codedbuf)
|
||||||
|
@ -2837,6 +3142,9 @@ gst_vaapi_encoder_h265_reconfigure (GstVaapiEncoder * base_encoder)
|
||||||
}
|
}
|
||||||
|
|
||||||
reset_properties (encoder);
|
reset_properties (encoder);
|
||||||
|
status = ensure_tile (encoder);
|
||||||
|
if (status != GST_VAAPI_ENCODER_STATUS_SUCCESS)
|
||||||
|
return status;
|
||||||
ensure_control_rate_params (encoder);
|
ensure_control_rate_params (encoder);
|
||||||
return set_context_info (base_encoder);
|
return set_context_info (base_encoder);
|
||||||
}
|
}
|
||||||
|
@ -2907,6 +3215,8 @@ gst_vaapi_encoder_h265_finalize (GObject * object)
|
||||||
}
|
}
|
||||||
g_queue_clear (&reorder_pool->reorder_frame_list);
|
g_queue_clear (&reorder_pool->reorder_frame_list);
|
||||||
|
|
||||||
|
reset_tile (encoder);
|
||||||
|
|
||||||
G_OBJECT_CLASS (gst_vaapi_encoder_h265_parent_class)->finalize (object);
|
G_OBJECT_CLASS (gst_vaapi_encoder_h265_parent_class)->finalize (object);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue