gstreamer/gst-libs/gst/video/video-scaler.c
Sebastian Dröge 166f979761 video-scaler: Ensure we're always using the correct lines for scaling in gst_video_scaler_2d()
Especially for interlaced input make sure to
  a) never mix both fields
  b) never read lines after the end of the input frame
  c) allocate enough space in the temporary lines to not write outside
     the allocated memory area

This fixes various memory corruptions and rescaling artefacts.
2019-11-03 10:46:43 +01:00

1640 lines
42 KiB
C

/* GStreamer
* Copyright (C) <2014> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <string.h>
#include <stdio.h>
#include <math.h>
/**
* SECTION:gstvideoscaler
* @title: GstVideoScaler
* @short_description: Utility object for rescaling video frames
*
* #GstVideoScaler is a utility object for rescaling and resampling
* video frames using various interpolation / sampling methods.
*
*/
#ifndef DISABLE_ORC
#include <orc/orcfunctions.h>
#else
#define orc_memcpy memcpy
#endif
#include "video-orc.h"
#include "video-scaler.h"
#ifndef GST_DISABLE_GST_DEBUG
#define GST_CAT_DEFAULT ensure_debug_category()
static GstDebugCategory *
ensure_debug_category (void)
{
static gsize cat_gonce = 0;
if (g_once_init_enter (&cat_gonce)) {
gsize cat_done;
cat_done = (gsize) _gst_debug_category_new ("video-scaler", 0,
"video-scaler object");
g_once_init_leave (&cat_gonce, cat_done);
}
return (GstDebugCategory *) cat_gonce;
}
#else
#define ensure_debug_category() /* NOOP */
#endif /* GST_DISABLE_GST_DEBUG */
#define SCALE_U8 12
#define SCALE_U8_ROUND (1 << (SCALE_U8 -1))
#define SCALE_U8_LQ 6
#define SCALE_U8_LQ_ROUND (1 << (SCALE_U8_LQ -1))
#define SCALE_U16 12
#define SCALE_U16_ROUND (1 << (SCALE_U16 -1))
#define LQ
typedef void (*GstVideoScalerHFunc) (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems);
typedef void (*GstVideoScalerVFunc) (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width,
guint n_elems);
struct _GstVideoScaler
{
GstVideoResamplerMethod method;
GstVideoScalerFlags flags;
GstVideoResampler resampler;
gboolean merged;
gint in_y_offset;
gint out_y_offset;
/* cached integer coefficients */
gint16 *taps_s16;
gint16 *taps_s16_4;
guint32 *offset_n;
/* for ORC */
gint inc;
gint tmpwidth;
gpointer tmpline1;
gpointer tmpline2;
};
static void
resampler_zip (GstVideoResampler * resampler, const GstVideoResampler * r1,
const GstVideoResampler * r2)
{
guint i, out_size, max_taps, n_phases;
gdouble *taps;
guint32 *offset, *phase;
g_return_if_fail (r1->max_taps == r2->max_taps);
out_size = r1->out_size + r2->out_size;
max_taps = r1->max_taps;
n_phases = out_size;
offset = g_malloc (sizeof (guint32) * out_size);
phase = g_malloc (sizeof (guint32) * n_phases);
taps = g_malloc (sizeof (gdouble) * max_taps * n_phases);
resampler->in_size = r1->in_size + r2->in_size;
resampler->out_size = out_size;
resampler->max_taps = max_taps;
resampler->n_phases = n_phases;
resampler->offset = offset;
resampler->phase = phase;
resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
resampler->taps = taps;
for (i = 0; i < out_size; i++) {
guint idx = i / 2;
const GstVideoResampler *r;
r = (i & 1) ? r2 : r1;
offset[i] = r->offset[idx] * 2 + (i & 1);
phase[i] = i;
memcpy (taps + i * max_taps, r->taps + r->phase[idx] * max_taps,
max_taps * sizeof (gdouble));
}
}
static void
realloc_tmplines (GstVideoScaler * scale, gint n_elems, gint width)
{
gint n_taps = scale->resampler.max_taps;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
n_taps *= 2;
scale->tmpline1 =
g_realloc (scale->tmpline1, sizeof (gint32) * width * n_elems * n_taps);
scale->tmpline2 =
g_realloc (scale->tmpline2, sizeof (gint32) * width * n_elems);
scale->tmpwidth = width;
}
static void
scaler_dump (GstVideoScaler * scale)
{
#if 0
gint i, j, in_size, out_size, max_taps;
guint32 *offset, *phase;
gdouble *taps;
GstVideoResampler *r = &scale->resampler;
in_size = r->in_size;
out_size = r->out_size;
offset = r->offset;
phase = r->phase;
max_taps = r->max_taps;
taps = r->taps;
g_print ("in %d, out %d, max_taps %d, n_phases %d\n", in_size, out_size,
max_taps, r->n_phases);
for (i = 0; i < out_size; i++) {
g_print ("%d: \t%d \t%d:", i, offset[i], phase[i]);
for (j = 0; j < max_taps; j++) {
g_print ("\t%f", taps[i * max_taps + j]);
}
g_print ("\n");
}
#endif
}
#define INTERLACE_SHIFT 0.5
/**
* gst_video_scaler_new: (skip)
* @method: a #GstVideoResamplerMethod
* @flags: #GstVideoScalerFlags
* @n_taps: number of taps to use
* @in_size: number of source elements
* @out_size: number of destination elements
* @options: (allow-none): extra options
*
* Make a new @method video scaler. @in_size source lines/pixels will
* be scaled to @out_size destination lines/pixels.
*
* @n_taps specifies the amount of pixels to use from the source for one output
* pixel. If n_taps is 0, this function chooses a good value automatically based
* on the @method and @in_size/@out_size.
*
* Returns: a #GstVideoScaler
*/
GstVideoScaler *
gst_video_scaler_new (GstVideoResamplerMethod method, GstVideoScalerFlags flags,
guint n_taps, guint in_size, guint out_size, GstStructure * options)
{
GstVideoScaler *scale;
g_return_val_if_fail (in_size != 0, NULL);
g_return_val_if_fail (out_size != 0, NULL);
scale = g_slice_new0 (GstVideoScaler);
GST_DEBUG ("%d %u %u->%u", method, n_taps, in_size, out_size);
scale->method = method;
scale->flags = flags;
if (flags & GST_VIDEO_SCALER_FLAG_INTERLACED) {
GstVideoResampler tresamp, bresamp;
gdouble shift;
shift = (INTERLACE_SHIFT * out_size) / in_size;
gst_video_resampler_init (&tresamp, method,
GST_VIDEO_RESAMPLER_FLAG_HALF_TAPS, (out_size + 1) / 2, n_taps, shift,
(in_size + 1) / 2, (out_size + 1) / 2, options);
n_taps = tresamp.max_taps;
gst_video_resampler_init (&bresamp, method, 0, out_size - tresamp.out_size,
n_taps, -shift, in_size - tresamp.in_size,
out_size - tresamp.out_size, options);
resampler_zip (&scale->resampler, &tresamp, &bresamp);
gst_video_resampler_clear (&tresamp);
gst_video_resampler_clear (&bresamp);
} else {
gst_video_resampler_init (&scale->resampler, method,
GST_VIDEO_RESAMPLER_FLAG_NONE, out_size, n_taps, 0.0, in_size, out_size,
options);
}
if (out_size == 1)
scale->inc = 0;
else
scale->inc = ((in_size - 1) << 16) / (out_size - 1) - 1;
scaler_dump (scale);
GST_DEBUG ("max_taps %d", scale->resampler.max_taps);
return scale;
}
/**
* gst_video_scaler_free:
* @scale: a #GstVideoScaler
*
* Free a previously allocated #GstVideoScaler @scale.
*/
void
gst_video_scaler_free (GstVideoScaler * scale)
{
g_return_if_fail (scale != NULL);
gst_video_resampler_clear (&scale->resampler);
g_free (scale->taps_s16);
g_free (scale->taps_s16_4);
g_free (scale->offset_n);
g_free (scale->tmpline1);
g_free (scale->tmpline2);
g_slice_free (GstVideoScaler, scale);
}
/**
* gst_video_scaler_get_max_taps:
* @scale: a #GstVideoScaler
*
* Get the maximum number of taps for @scale.
*
* Returns: the maximum number of taps
*/
guint
gst_video_scaler_get_max_taps (GstVideoScaler * scale)
{
g_return_val_if_fail (scale != NULL, 0);
return scale->resampler.max_taps;
}
/**
* gst_video_scaler_get_coeff:
* @scale: a #GstVideoScaler
* @out_offset: an output offset
* @in_offset: result input offset
* @n_taps: result n_taps
*
* For a given pixel at @out_offset, get the first required input pixel at
* @in_offset and the @n_taps filter coefficients.
*
* Note that for interlaced content, @in_offset needs to be incremented with
* 2 to get the next input line.
*
* Returns: an array of @n_tap gdouble values with filter coefficients.
*/
const gdouble *
gst_video_scaler_get_coeff (GstVideoScaler * scale,
guint out_offset, guint * in_offset, guint * n_taps)
{
guint offset, phase;
g_return_val_if_fail (scale != NULL, NULL);
g_return_val_if_fail (out_offset < scale->resampler.out_size, NULL);
offset = scale->resampler.offset[out_offset];
phase = scale->resampler.phase[out_offset];
if (in_offset)
*in_offset = offset;
if (n_taps) {
*n_taps = scale->resampler.max_taps;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
*n_taps *= 2;
}
return scale->resampler.taps + phase * scale->resampler.max_taps;
}
static gboolean
resampler_convert_coeff (const gdouble * src,
gpointer dest, guint n, guint bits, guint precision)
{
gdouble multiplier;
gint i, j;
gdouble offset, l_offset, h_offset;
gboolean exact = FALSE;
multiplier = (1 << precision);
/* Round to integer, but with an adjustable bias that we use to
* eliminate the DC error. */
l_offset = 0.0;
h_offset = 1.0;
offset = 0.5;
for (i = 0; i < 64; i++) {
gint sum = 0;
for (j = 0; j < n; j++) {
gint16 tap = floor (offset + src[j] * multiplier);
((gint16 *) dest)[j] = tap;
sum += tap;
}
if (sum == (1 << precision)) {
exact = TRUE;
break;
}
if (l_offset == h_offset)
break;
if (sum < (1 << precision)) {
if (offset > l_offset)
l_offset = offset;
offset += (h_offset - l_offset) / 2;
} else {
if (offset < h_offset)
h_offset = offset;
offset -= (h_offset - l_offset) / 2;
}
}
if (!exact)
GST_WARNING ("can't find exact taps");
return exact;
}
static void
make_s16_taps (GstVideoScaler * scale, gint n_elems, gint precision)
{
gint i, j, max_taps, n_phases, out_size, src_inc;
gint16 *taps_s16, *taps_s16_4;
gdouble *taps;
guint32 *phase, *offset, *offset_n;
n_phases = scale->resampler.n_phases;
max_taps = scale->resampler.max_taps;
taps = scale->resampler.taps;
taps_s16 = scale->taps_s16 = g_malloc (sizeof (gint16) * n_phases * max_taps);
for (i = 0; i < n_phases; i++) {
resampler_convert_coeff (taps, taps_s16, max_taps, 16, precision);
taps += max_taps;
taps_s16 += max_taps;
}
out_size = scale->resampler.out_size;
taps_s16 = scale->taps_s16;
phase = scale->resampler.phase;
offset = scale->resampler.offset;
taps_s16_4 = scale->taps_s16_4 =
g_malloc (sizeof (gint16) * out_size * max_taps * 4);
offset_n = scale->offset_n =
g_malloc (sizeof (guint32) * out_size * max_taps);
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2;
else
src_inc = 1;
for (j = 0; j < max_taps; j++) {
for (i = 0; i < out_size; i++) {
gint16 tap;
if (scale->merged) {
if ((i & 1) == scale->out_y_offset)
offset_n[j * out_size + i] = offset[i] + (2 * j);
else
offset_n[j * out_size + i] = offset[i] + (4 * j);
} else {
offset_n[j * out_size + i] = offset[i] + j * src_inc;
}
tap = taps_s16[phase[i] * max_taps + j];
taps_s16_4[(j * out_size + i) * n_elems + 0] = tap;
if (n_elems > 1)
taps_s16_4[(j * out_size + i) * n_elems + 1] = tap;
if (n_elems > 2)
taps_s16_4[(j * out_size + i) * n_elems + 2] = tap;
if (n_elems > 3)
taps_s16_4[(j * out_size + i) * n_elems + 3] = tap;
}
}
}
#undef ACC_SCALE
static void
video_scale_h_near_u8 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
guint8 *s, *d;
gint i;
d = (guint8 *) dest + dest_offset;
s = (guint8 *) src;
{
#ifndef ACC_SCALE
guint32 *offset = scale->resampler.offset + dest_offset;
for (i = 0; i < width; i++)
d[i] = s[offset[i]];
#else
gint acc = 0;
for (i = 0; i < width; i++) {
gint j = (acc + 0x8000) >> 16;
d[i] = s[j];
acc += scale->inc;
}
#endif
}
}
static void
video_scale_h_near_3u8 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
guint8 *s, *d;
gint i;
d = (guint8 *) dest + dest_offset;
s = (guint8 *) src;
{
#ifndef ACC_SCALE
guint32 *offset = scale->resampler.offset + dest_offset;
for (i = 0; i < width; i++) {
gint j = offset[i] * 3;
d[i * 3 + 0] = s[j + 0];
d[i * 3 + 1] = s[j + 1];
d[i * 3 + 2] = s[j + 2];
}
#else
gint acc = 0;
for (i = 0; i < width; i++) {
gint j = ((acc + 0x8000) >> 16) * 3;
d[i * 3 + 0] = s[j + 0];
d[i * 3 + 1] = s[j + 1];
d[i * 3 + 2] = s[j + 2];
acc += scale->inc;
}
#endif
}
}
static void
video_scale_h_near_u16 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
guint16 *s, *d;
gint i;
d = (guint16 *) dest + dest_offset;
s = (guint16 *) src;
{
#ifndef ACC_SCALE
guint32 *offset = scale->resampler.offset + dest_offset;
for (i = 0; i < width; i++)
d[i] = s[offset[i]];
#else
gint acc = 0;
for (i = 0; i < width; i++) {
gint j = (acc + 0x8000) >> 16;
d[i] = s[j];
acc += scale->inc;
}
#endif
}
}
static void
video_scale_h_near_u32 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
guint32 *s, *d;
d = (guint32 *) dest + dest_offset;
s = (guint32 *) src;
#if 0
/* ORC is slower on this */
video_orc_resample_h_near_u32_lq (d, s, 0, scale->inc, width);
#elif 0
video_orc_resample_h_near_u32 (d, s, offset, width);
#else
{
gint i;
#ifndef ACC_SCALE
guint32 *offset = scale->resampler.offset + dest_offset;
for (i = 0; i < width; i++)
d[i] = s[offset[i]];
#else
gint acc = 0;
for (i = 0; i < width; i++) {
gint j = (acc + 0x8000) >> 16;
d[i] = s[j];
acc += scale->inc;
}
#endif
}
#endif
}
static void
video_scale_h_near_u64 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
guint64 *s, *d;
gint i;
guint32 *offset;
d = (guint64 *) dest + dest_offset;
s = (guint64 *) src;
offset = scale->resampler.offset + dest_offset;
for (i = 0; i < width; i++)
d[i] = s[offset[i]];
}
static void
video_scale_h_2tap_1u8 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
guint8 *s, *d;
d = (guint8 *) dest + dest_offset;
s = (guint8 *) src;
video_orc_resample_h_2tap_1u8_lq (d, s, 0, scale->inc, width);
}
static void
video_scale_h_2tap_4u8 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
guint32 *s, *d;
d = (guint32 *) dest + dest_offset;
s = (guint32 *) src;
video_orc_resample_h_2tap_4u8_lq (d, s, 0, scale->inc, width);
}
static void
video_scale_h_ntap_u8 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
gint16 *taps;
gint i, max_taps, count;
gpointer d;
guint32 *offset_n;
guint8 *pixels;
gint16 *temp;
if (scale->taps_s16 == NULL)
#ifdef LQ
make_s16_taps (scale, n_elems, SCALE_U8_LQ);
#else
make_s16_taps (scale, n_elems, SCALE_U8);
#endif
max_taps = scale->resampler.max_taps;
offset_n = scale->offset_n;
pixels = (guint8 *) scale->tmpline1;
/* prepare the arrays */
count = width * max_taps;
switch (n_elems) {
case 1:
{
guint8 *s = (guint8 *) src;
for (i = 0; i < count; i++)
pixels[i] = s[offset_n[i]];
d = (guint8 *) dest + dest_offset;
break;
}
case 2:
{
guint16 *p16 = (guint16 *) pixels;
guint16 *s = (guint16 *) src;
for (i = 0; i < count; i++)
p16[i] = s[offset_n[i]];
d = (guint16 *) dest + dest_offset;
break;
}
case 3:
{
guint8 *s = (guint8 *) src;
for (i = 0; i < count; i++) {
gint j = offset_n[i] * 3;
pixels[i * 3 + 0] = s[j + 0];
pixels[i * 3 + 1] = s[j + 1];
pixels[i * 3 + 2] = s[j + 2];
}
d = (guint8 *) dest + dest_offset * 3;
break;
}
case 4:
{
guint32 *p32 = (guint32 *) pixels;
guint32 *s = (guint32 *) src;
#if 0
video_orc_resample_h_near_u32 (p32, s, offset_n, count);
#else
for (i = 0; i < count; i++)
p32[i] = s[offset_n[i]];
#endif
d = (guint32 *) dest + dest_offset;
break;
}
default:
return;
}
temp = (gint16 *) scale->tmpline2;
taps = scale->taps_s16_4;
count = width * n_elems;
#ifdef LQ
if (max_taps == 2) {
video_orc_resample_h_2tap_u8_lq (d, pixels, pixels + count, taps,
taps + count, count);
} else {
/* first pixels with first tap to temp */
if (max_taps >= 3) {
video_orc_resample_h_multaps3_u8_lq (temp, pixels, pixels + count,
pixels + count * 2, taps, taps + count, taps + count * 2, count);
max_taps -= 3;
pixels += count * 3;
taps += count * 3;
} else {
gint first = max_taps % 3;
video_orc_resample_h_multaps_u8_lq (temp, pixels, taps, count);
video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels + count, count,
taps + count, count * 2, count, first - 1);
max_taps -= first;
pixels += count * first;
taps += count * first;
}
while (max_taps > 3) {
if (max_taps >= 6) {
video_orc_resample_h_muladdtaps3_u8_lq (temp, pixels, pixels + count,
pixels + count * 2, taps, taps + count, taps + count * 2, count);
max_taps -= 3;
pixels += count * 3;
taps += count * 3;
} else {
video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels, count,
taps, count * 2, count, max_taps - 3);
pixels += count * (max_taps - 3);
taps += count * (max_taps - 3);
max_taps = 3;
}
}
if (max_taps == 3) {
video_orc_resample_h_muladdscaletaps3_u8_lq (d, pixels, pixels + count,
pixels + count * 2, taps, taps + count, taps + count * 2, temp,
count);
} else {
if (max_taps) {
/* add other pixels with other taps to t4 */
video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels, count,
taps, count * 2, count, max_taps);
}
/* scale and write final result */
video_orc_resample_scaletaps_u8_lq (d, temp, count);
}
}
#else
/* first pixels with first tap to t4 */
video_orc_resample_h_multaps_u8 (temp, pixels, taps, count);
/* add other pixels with other taps to t4 */
video_orc_resample_h_muladdtaps_u8 (temp, 0, pixels + count, count,
taps + count, count * 2, count, max_taps - 1);
/* scale and write final result */
video_orc_resample_scaletaps_u8 (d, temp, count);
#endif
}
static void
video_scale_h_ntap_u16 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width, guint n_elems)
{
gint16 *taps;
gint i, max_taps, count;
gpointer d;
guint32 *offset_n;
guint16 *pixels;
gint32 *temp;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, n_elems, SCALE_U16);
max_taps = scale->resampler.max_taps;
offset_n = scale->offset_n;
pixels = (guint16 *) scale->tmpline1;
/* prepare the arrays FIXME, we can add this into ORC */
count = width * max_taps;
switch (n_elems) {
case 1:
{
guint16 *s = (guint16 *) src;
for (i = 0; i < count; i++)
pixels[i] = s[offset_n[i]];
d = (guint16 *) dest + dest_offset;
break;
}
case 4:
{
guint64 *p64 = (guint64 *) pixels;
guint64 *s = (guint64 *) src;
#if 0
video_orc_resample_h_near_u32 (p32, s, offset_n, count);
#else
for (i = 0; i < count; i++)
p64[i] = s[offset_n[i]];
#endif
d = (guint64 *) dest + dest_offset;
break;
}
default:
return;
}
temp = (gint32 *) scale->tmpline2;
taps = scale->taps_s16_4;
count = width * n_elems;
if (max_taps == 2) {
video_orc_resample_h_2tap_u16 (d, pixels, pixels + count, taps,
taps + count, count);
} else {
/* first pixels with first tap to t4 */
video_orc_resample_h_multaps_u16 (temp, pixels, taps, count);
/* add other pixels with other taps to t4 */
video_orc_resample_h_muladdtaps_u16 (temp, 0, pixels + count, count * 2,
taps + count, count * 2, count, max_taps - 1);
/* scale and write final result */
video_orc_resample_scaletaps_u16 (d, temp, count);
}
}
static void
video_scale_v_near_u8 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width,
guint n_elems)
{
if (dest != srcs[0])
memcpy (dest, srcs[0], n_elems * width);
}
static void
video_scale_v_near_u16 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width,
guint n_elems)
{
if (dest != srcs[0])
memcpy (dest, srcs[0], n_elems * 2 * width);
}
static void
video_scale_v_2tap_u8 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width,
guint n_elems)
{
gint max_taps, src_inc;
guint8 *s1, *s2, *d;
gint16 p1;
if (scale->taps_s16 == NULL)
#ifdef LQ
make_s16_taps (scale, n_elems, SCALE_U8_LQ + 2);
#else
make_s16_taps (scale, n_elems, SCALE_U8);
#endif
max_taps = scale->resampler.max_taps;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2;
else
src_inc = 1;
d = (guint8 *) dest;
s1 = (guint8 *) srcs[0 * src_inc];
s2 = (guint8 *) srcs[1 * src_inc];
p1 = scale->taps_s16[dest_offset * max_taps + 1];
#ifdef LQ
video_orc_resample_v_2tap_u8_lq (d, s1, s2, p1, width * n_elems);
#else
video_orc_resample_v_2tap_u8 (d, s1, s2, p1, width * n_elems);
#endif
}
static void
video_scale_v_2tap_u16 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width,
guint n_elems)
{
gint max_taps, src_inc;
guint16 *s1, *s2, *d;
gint16 p1;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, n_elems, SCALE_U16);
max_taps = scale->resampler.max_taps;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2;
else
src_inc = 1;
d = (guint16 *) dest;
s1 = (guint16 *) srcs[0 * src_inc];
s2 = (guint16 *) srcs[1 * src_inc];
p1 = scale->taps_s16[dest_offset * max_taps + 1];
video_orc_resample_v_2tap_u16 (d, s1, s2, p1, width * n_elems);
}
#if 0
static void
video_scale_h_4tap_8888 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
gint16 *taps;
gint i, max_taps, count;
guint8 *d;
guint32 *offset_n;
guint32 *pixels;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, n_elems, S16_SCALE);
max_taps = scale->resampler.max_taps;
offset_n = scale->offset_n;
d = (guint8 *) dest + 4 * dest_offset;
/* prepare the arrays FIXME, we can add this into ORC */
count = width * max_taps;
pixels = (guint32 *) scale->tmpline1;
for (i = 0; i < count; i++)
pixels[i] = ((guint32 *) src)[offset_n[i]];
taps = scale->taps_s16_4;
count = width * 4;
video_orc_resample_h_4tap_8 (d, pixels, pixels + width, pixels + 2 * width,
pixels + 3 * width, taps, taps + count, taps + 2 * count,
taps + 3 * count, count);
}
#endif
static void
video_scale_v_4tap_u8 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width,
guint n_elems)
{
gint max_taps;
guint8 *s1, *s2, *s3, *s4, *d;
gint p1, p2, p3, p4, src_inc;
gint16 *taps;
if (scale->taps_s16 == NULL)
#ifdef LQ
make_s16_taps (scale, n_elems, SCALE_U8_LQ);
#else
make_s16_taps (scale, n_elems, SCALE_U8);
#endif
max_taps = scale->resampler.max_taps;
taps = scale->taps_s16 + dest_offset * max_taps;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2;
else
src_inc = 1;
d = (guint8 *) dest;
s1 = (guint8 *) srcs[0 * src_inc];
s2 = (guint8 *) srcs[1 * src_inc];
s3 = (guint8 *) srcs[2 * src_inc];
s4 = (guint8 *) srcs[3 * src_inc];
p1 = taps[0];
p2 = taps[1];
p3 = taps[2];
p4 = taps[3];
#ifdef LQ
video_orc_resample_v_4tap_u8_lq (d, s1, s2, s3, s4, p1, p2, p3, p4,
width * n_elems);
#else
video_orc_resample_v_4tap_u8 (d, s1, s2, s3, s4, p1, p2, p3, p4,
width * n_elems);
#endif
}
static void
video_scale_v_ntap_u8 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width,
guint n_elems)
{
gint16 *taps;
gint i, max_taps, count, src_inc;
gpointer d;
gint16 *temp;
if (scale->taps_s16 == NULL)
#ifdef LQ
make_s16_taps (scale, n_elems, SCALE_U8_LQ);
#else
make_s16_taps (scale, n_elems, SCALE_U8);
#endif
max_taps = scale->resampler.max_taps;
taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
d = (guint32 *) dest;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2;
else
src_inc = 1;
temp = (gint16 *) scale->tmpline2;
count = width * n_elems;
#ifdef LQ
if (max_taps >= 4) {
video_orc_resample_v_multaps4_u8_lq (temp, srcs[0], srcs[1 * src_inc],
srcs[2 * src_inc], srcs[3 * src_inc], taps[0], taps[1], taps[2],
taps[3], count);
max_taps -= 4;
srcs += 4 * src_inc;
taps += 4;
} else {
gint first = (max_taps % 4);
video_orc_resample_v_multaps_u8_lq (temp, srcs[0], taps[0], count);
for (i = 1; i < first; i++) {
video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
count);
}
max_taps -= first;
srcs += first * src_inc;
taps += first;
}
while (max_taps > 4) {
if (max_taps >= 8) {
video_orc_resample_v_muladdtaps4_u8_lq (temp, srcs[0], srcs[1 * src_inc],
srcs[2 * src_inc], srcs[3 * src_inc], taps[0], taps[1], taps[2],
taps[3], count);
max_taps -= 4;
srcs += 4 * src_inc;
taps += 4;
} else {
for (i = 0; i < max_taps - 4; i++)
video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
count);
srcs += (max_taps - 4) * src_inc;
taps += (max_taps - 4);
max_taps = 4;
}
}
if (max_taps == 4) {
video_orc_resample_v_muladdscaletaps4_u8_lq (d, srcs[0], srcs[1 * src_inc],
srcs[2 * src_inc], srcs[3 * src_inc], temp, taps[0], taps[1], taps[2],
taps[3], count);
} else {
for (i = 0; i < max_taps; i++)
video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
count);
video_orc_resample_scaletaps_u8_lq (d, temp, count);
}
#else
video_orc_resample_v_multaps_u8 (temp, srcs[0], taps[0], count);
for (i = 1; i < max_taps; i++) {
video_orc_resample_v_muladdtaps_u8 (temp, srcs[i * src_inc], taps[i],
count);
}
video_orc_resample_scaletaps_u8 (d, temp, count);
#endif
}
static void
video_scale_v_ntap_u16 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width,
guint n_elems)
{
gint16 *taps;
gint i, max_taps, count, src_inc;
gpointer d;
gint32 *temp;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, n_elems, SCALE_U16);
max_taps = scale->resampler.max_taps;
taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
d = (guint16 *) dest;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2;
else
src_inc = 1;
temp = (gint32 *) scale->tmpline2;
count = width * n_elems;
video_orc_resample_v_multaps_u16 (temp, srcs[0], taps[0], count);
for (i = 1; i < max_taps; i++) {
video_orc_resample_v_muladdtaps_u16 (temp, srcs[i * src_inc], taps[i],
count);
}
video_orc_resample_scaletaps_u16 (d, temp, count);
}
static gint
get_y_offset (GstVideoFormat format)
{
switch (format) {
case GST_VIDEO_FORMAT_YUY2:
case GST_VIDEO_FORMAT_YVYU:
return 0;
default:
case GST_VIDEO_FORMAT_UYVY:
return 1;
}
}
/**
* gst_video_scaler_combine_packed_YUV: (skip)
* @y_scale: a scaler for the Y component
* @uv_scale: a scaler for the U and V components
* @in_format: the input video format
* @out_format: the output video format
*
* Combine a scaler for Y and UV into one scaler for the packed @format.
*
* Returns: a new horizontal videoscaler for @format.
*
* Since: 1.6
*/
GstVideoScaler *
gst_video_scaler_combine_packed_YUV (GstVideoScaler * y_scale,
GstVideoScaler * uv_scale, GstVideoFormat in_format,
GstVideoFormat out_format)
{
GstVideoScaler *scale;
GstVideoResampler *resampler;
guint i, out_size, max_taps, n_phases;
gdouble *taps;
guint32 *offset, *phase;
g_return_val_if_fail (y_scale != NULL, NULL);
g_return_val_if_fail (uv_scale != NULL, NULL);
g_return_val_if_fail (uv_scale->resampler.max_taps ==
y_scale->resampler.max_taps, NULL);
scale = g_slice_new0 (GstVideoScaler);
scale->method = y_scale->method;
scale->flags = y_scale->flags;
scale->merged = TRUE;
resampler = &scale->resampler;
out_size = GST_ROUND_UP_4 (y_scale->resampler.out_size * 2);
max_taps = y_scale->resampler.max_taps;
n_phases = out_size;
offset = g_malloc (sizeof (guint32) * out_size);
phase = g_malloc (sizeof (guint32) * n_phases);
taps = g_malloc (sizeof (gdouble) * max_taps * n_phases);
resampler->in_size = y_scale->resampler.in_size * 2;
resampler->out_size = out_size;
resampler->max_taps = max_taps;
resampler->n_phases = n_phases;
resampler->offset = offset;
resampler->phase = phase;
resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
resampler->taps = taps;
scale->in_y_offset = get_y_offset (in_format);
scale->out_y_offset = get_y_offset (out_format);
scale->inc = y_scale->inc;
for (i = 0; i < out_size; i++) {
gint ic;
if ((i & 1) == scale->out_y_offset) {
ic = MIN (i / 2, y_scale->resampler.out_size - 1);
offset[i] = y_scale->resampler.offset[ic] * 2 + scale->in_y_offset;
memcpy (taps + i * max_taps, y_scale->resampler.taps +
y_scale->resampler.phase[ic] * max_taps, max_taps * sizeof (gdouble));
} else {
ic = MIN (i / 4, uv_scale->resampler.out_size - 1);
offset[i] = uv_scale->resampler.offset[ic] * 4 + (i & 3);
memcpy (taps + i * max_taps, uv_scale->resampler.taps +
uv_scale->resampler.phase[ic] * max_taps,
max_taps * sizeof (gdouble));
}
phase[i] = i;
}
scaler_dump (scale);
return scale;
}
static gboolean
get_functions (GstVideoScaler * hscale, GstVideoScaler * vscale,
GstVideoFormat format,
GstVideoScalerHFunc * hfunc, GstVideoScalerVFunc * vfunc,
gint * n_elems, guint * width, gint * bits)
{
gboolean mono = FALSE;
switch (format) {
case GST_VIDEO_FORMAT_GRAY8:
*bits = 8;
*n_elems = 1;
mono = TRUE;
break;
case GST_VIDEO_FORMAT_YUY2:
case GST_VIDEO_FORMAT_YVYU:
case GST_VIDEO_FORMAT_UYVY:
*bits = 8;
*n_elems = 1;
*width = GST_ROUND_UP_4 (*width * 2);
break;
case GST_VIDEO_FORMAT_RGB:
case GST_VIDEO_FORMAT_BGR:
case GST_VIDEO_FORMAT_v308:
case GST_VIDEO_FORMAT_IYU2:
*bits = 8;
*n_elems = 3;
break;
case GST_VIDEO_FORMAT_AYUV:
case GST_VIDEO_FORMAT_RGBx:
case GST_VIDEO_FORMAT_BGRx:
case GST_VIDEO_FORMAT_xRGB:
case GST_VIDEO_FORMAT_xBGR:
case GST_VIDEO_FORMAT_RGBA:
case GST_VIDEO_FORMAT_BGRA:
case GST_VIDEO_FORMAT_ARGB:
case GST_VIDEO_FORMAT_ABGR:
*bits = 8;
*n_elems = 4;
break;
case GST_VIDEO_FORMAT_ARGB64:
case GST_VIDEO_FORMAT_AYUV64:
*bits = 16;
*n_elems = 4;
break;
case GST_VIDEO_FORMAT_GRAY16_LE:
case GST_VIDEO_FORMAT_GRAY16_BE:
*bits = 16;
*n_elems = 1;
mono = TRUE;
break;
case GST_VIDEO_FORMAT_NV12:
case GST_VIDEO_FORMAT_NV16:
case GST_VIDEO_FORMAT_NV21:
case GST_VIDEO_FORMAT_NV24:
case GST_VIDEO_FORMAT_NV61:
*bits = 8;
*n_elems = 2;
break;
default:
return FALSE;
}
if (*bits == 8) {
switch (hscale ? hscale->resampler.max_taps : 0) {
case 0:
break;
case 1:
if (*n_elems == 1)
*hfunc = video_scale_h_near_u8;
else if (*n_elems == 2)
*hfunc = video_scale_h_near_u16;
else if (*n_elems == 3)
*hfunc = video_scale_h_near_3u8;
else if (*n_elems == 4)
*hfunc = video_scale_h_near_u32;
break;
case 2:
if (*n_elems == 1 && mono)
*hfunc = video_scale_h_2tap_1u8;
else if (*n_elems == 4)
*hfunc = video_scale_h_2tap_4u8;
else
*hfunc = video_scale_h_ntap_u8;
break;
default:
*hfunc = video_scale_h_ntap_u8;
break;
}
switch (vscale ? vscale->resampler.max_taps : 0) {
case 0:
break;
case 1:
*vfunc = video_scale_v_near_u8;
break;
case 2:
*vfunc = video_scale_v_2tap_u8;
break;
case 4:
*vfunc = video_scale_v_4tap_u8;
break;
default:
*vfunc = video_scale_v_ntap_u8;
break;
}
} else if (*bits == 16) {
switch (hscale ? hscale->resampler.max_taps : 0) {
case 0:
break;
case 1:
if (*n_elems == 1)
*hfunc = video_scale_h_near_u16;
else
*hfunc = video_scale_h_near_u64;
break;
default:
*hfunc = video_scale_h_ntap_u16;
break;
}
switch (vscale ? vscale->resampler.max_taps : 0) {
case 0:
break;
case 1:
*vfunc = video_scale_v_near_u16;
break;
case 2:
*vfunc = video_scale_v_2tap_u16;
break;
default:
*vfunc = video_scale_v_ntap_u16;
break;
}
}
return TRUE;
}
/**
* gst_video_scaler_horizontal:
* @scale: a #GstVideoScaler
* @format: a #GstVideoFormat for @src and @dest
* @src: source pixels
* @dest: destination pixels
* @dest_offset: the horizontal destination offset
* @width: the number of pixels to scale
*
* Horizontally scale the pixels in @src to @dest, starting from @dest_offset
* for @width samples.
*/
void
gst_video_scaler_horizontal (GstVideoScaler * scale, GstVideoFormat format,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
gint n_elems, bits;
GstVideoScalerHFunc func = NULL;
g_return_if_fail (scale != NULL);
g_return_if_fail (src != NULL);
g_return_if_fail (dest != NULL);
g_return_if_fail (dest_offset + width <= scale->resampler.out_size);
if (!get_functions (scale, NULL, format, &func, NULL, &n_elems, &width, &bits)
|| func == NULL)
goto no_func;
if (scale->tmpwidth < width)
realloc_tmplines (scale, n_elems, width);
func (scale, src, dest, dest_offset, width, n_elems);
return;
no_func:
{
GST_WARNING ("no scaler function for format");
}
}
/**
* gst_video_scaler_vertical:
* @scale: a #GstVideoScaler
* @format: a #GstVideoFormat for @srcs and @dest
* @src_lines: source pixels lines
* @dest: destination pixels
* @dest_offset: the vertical destination offset
* @width: the number of pixels to scale
*
* Vertically combine @width pixels in the lines in @src_lines to @dest.
* @dest is the location of the target line at @dest_offset and
* @srcs are the input lines for @dest_offset.
*/
void
gst_video_scaler_vertical (GstVideoScaler * scale, GstVideoFormat format,
gpointer src_lines[], gpointer dest, guint dest_offset, guint width)
{
gint n_elems, bits;
GstVideoScalerVFunc func = NULL;
g_return_if_fail (scale != NULL);
g_return_if_fail (src_lines != NULL);
g_return_if_fail (dest != NULL);
g_return_if_fail (dest_offset < scale->resampler.out_size);
if (!get_functions (NULL, scale, format, NULL, &func, &n_elems, &width, &bits)
|| func == NULL)
goto no_func;
if (scale->tmpwidth < width)
realloc_tmplines (scale, n_elems, width);
func (scale, src_lines, dest, dest_offset, width, n_elems);
return;
no_func:
{
GST_WARNING ("no scaler function for format");
}
}
/**
* gst_video_scaler_2d:
* @hscale: a horizontal #GstVideoScaler
* @vscale: a vertical #GstVideoScaler
* @format: a #GstVideoFormat for @srcs and @dest
* @src: source pixels
* @src_stride: source pixels stride
* @dest: destination pixels
* @dest_stride: destination pixels stride
* @x: the horizontal destination offset
* @y: the vertical destination offset
* @width: the number of output pixels to scale
* @height: the number of output lines to scale
*
* Scale a rectangle of pixels in @src with @src_stride to @dest with
* @dest_stride using the horizontal scaler @hscaler and the vertical
* scaler @vscale.
*
* One or both of @hscale and @vscale can be NULL to only perform scaling in
* one dimension or do a copy without scaling.
*
* @x and @y are the coordinates in the destination image to process.
*/
void
gst_video_scaler_2d (GstVideoScaler * hscale, GstVideoScaler * vscale,
GstVideoFormat format, gpointer src, gint src_stride,
gpointer dest, gint dest_stride, guint x, guint y,
guint width, guint height)
{
gint n_elems, bits;
GstVideoScalerHFunc hfunc = NULL;
GstVideoScalerVFunc vfunc = NULL;
gint i;
gboolean interlaced;
g_return_if_fail (src != NULL);
g_return_if_fail (dest != NULL);
if (!get_functions (hscale, vscale, format, &hfunc, &vfunc, &n_elems, &width,
&bits))
goto no_func;
interlaced = vscale && ! !(vscale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED);
#define LINE(s,ss,i) ((guint8 *)(s) + ((i) * (ss)))
#define TMP_LINE(s,i) ((guint8 *)((s)->tmpline1) + (i) * (sizeof (gint32) * width * n_elems))
if (vscale == NULL) {
if (hscale == NULL) {
guint xo, xw;
guint8 *s, *d;
xo = x * n_elems;
xw = width * n_elems * (bits / 8);
s = LINE (src, src_stride, y) + xo;
d = LINE (dest, dest_stride, y) + xo;
/* no scaling, do memcpy */
for (i = y; i < height; i++) {
memcpy (d, s, xw);
d += dest_stride;
s += src_stride;
}
} else {
if (hscale->tmpwidth < width)
realloc_tmplines (hscale, n_elems, width);
/* only horizontal scaling */
for (i = y; i < height; i++) {
hfunc (hscale, LINE (src, src_stride, i), LINE (dest, dest_stride, i),
x, width, n_elems);
}
}
} else {
guint v_taps;
gpointer *lines;
if (vscale->tmpwidth < width)
realloc_tmplines (vscale, n_elems, width);
v_taps = vscale->resampler.max_taps;
lines = g_alloca ((interlaced ? 2 : 1) * v_taps * sizeof (gpointer));
memset (lines, 0, (interlaced ? 2 : 1) * v_taps * sizeof (gpointer));
if (hscale == NULL) {
guint src_inc = interlaced ? 2 : 1;
/* only vertical scaling */
for (i = y; i < height; i++) {
guint in, j;
in = vscale->resampler.offset[i];
for (j = 0; j < v_taps; j++) {
guint l = in + j * src_inc;
g_assert (l < vscale->resampler.in_size);
lines[j * src_inc] = LINE (src, src_stride, l);
}
vfunc (vscale, lines, LINE (dest, dest_stride, i), i, width, n_elems);
}
} else {
gint s1, s2;
guint *tmpline_lines;
tmpline_lines = g_newa (guint, (interlaced ? 2 : 1) * v_taps);
/* initialize with -1 */
memset (tmpline_lines, 0xff,
(interlaced ? 2 : 1) * v_taps * sizeof (guint));
if (hscale->tmpwidth < width)
realloc_tmplines (hscale, n_elems, width);
s1 = width * vscale->resampler.offset[height - 1];
s2 = width * height;
if (s1 <= s2) {
for (i = y; i < height; i++) {
guint in, j;
guint src_inc = interlaced ? 2 : 1;
guint f2_offset = (interlaced && (i % 2 == 1)) * v_taps;
in = vscale->resampler.offset[i];
for (j = 0; j < v_taps; j++) {
guint k;
guint l = in + j * src_inc;
g_assert (l < vscale->resampler.in_size);
/* First check if we already have this line in tmplines */
for (k = f2_offset; k < v_taps + f2_offset; k++) {
if (tmpline_lines[k] == l) {
lines[j * src_inc] = TMP_LINE (vscale, k);
break;
}
}
/* Found */
if (k < v_taps + f2_offset)
continue;
/* Otherwise find an empty line we can clear */
for (k = f2_offset; k < v_taps + f2_offset; k++) {
if (tmpline_lines[k] < in || tmpline_lines[k] == -1)
break;
}
/* Must not happen, that would mean we don't have enough space to
* begin with */
g_assert (k < v_taps + f2_offset);
hfunc (hscale, LINE (src, src_stride, l), TMP_LINE (vscale, k), x,
width, n_elems);
tmpline_lines[k] = l;
lines[j * src_inc] = TMP_LINE (vscale, k);
}
vfunc (vscale, lines, LINE (dest, dest_stride, i), i, width, n_elems);
}
} else {
guint vx, vw, w1, ws;
guint h_taps;
h_taps = hscale->resampler.max_taps;
w1 = x + width - 1;
ws = hscale->resampler.offset[w1];
/* we need to estimate the area that we first need to scale in the
* vertical direction. Scale x and width to find the lower bound and
* overshoot the width to find the upper bound */
vx = (hscale->inc * x) >> 16;
vx = MIN (vx, hscale->resampler.offset[x]);
vw = (hscale->inc * (x + width)) >> 16;
if (hscale->merged) {
if ((w1 & 1) == hscale->out_y_offset)
vw = MAX (vw, ws + (2 * h_taps));
else
vw = MAX (vw, ws + (4 * h_taps));
} else {
vw = MAX (vw, ws + h_taps);
}
vw += 1;
/* but clamp to max size */
vw = MIN (vw, hscale->resampler.in_size);
if (vscale->tmpwidth < vw)
realloc_tmplines (vscale, n_elems, vw);
for (i = y; i < height; i++) {
guint in, j;
guint src_inc = interlaced ? 2 : 1;
in = vscale->resampler.offset[i];
for (j = 0; j < v_taps; j++) {
guint l = in + j * src_inc;
g_assert (l < vscale->resampler.in_size);
lines[j * src_inc] =
LINE (src, src_stride, in + j * src_inc) + vx * n_elems;
}
vfunc (vscale, lines, TMP_LINE (vscale, 0) + vx * n_elems, i,
vw - vx, n_elems);
hfunc (hscale, TMP_LINE (vscale, 0), LINE (dest, dest_stride,
i), x, width, n_elems);
}
}
}
}
return;
no_func:
{
GST_WARNING ("no scaler function for format");
}
}
#undef LINE
#undef TMP_LINE