gstreamer/gst-libs/gst/video/video-scaler.c
Wim Taymans 49d909dabf video-scaler: add more ORC functions
Add the old ORC functions for nearest and linear. Label them as Low
quality because they are not as accurate but ORC lacks opcodes to
express this for now.
2014-10-30 11:48:00 +01:00

588 lines
15 KiB
C

/* GStreamer
* Copyright (C) <2014> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <orc/orcfunctions.h>
#include "video-orc.h"
#include "video-scaler.h"
#define S16_SCALE 12
#define S16_SCALE_ROUND (1 << (S16_SCALE -1))
#define LQ
typedef void (*GstVideoScalerHFunc) (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width);
typedef void (*GstVideoScalerVFunc) (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width);
struct _GstVideoScaler
{
GstVideoResamplerMethod method;
GstVideoScalerFlags flags;
GstVideoResampler resampler;
/* cached integer coefficients */
gint16 *taps_s16;
/* for ORC */
gint inc;
};
static void
resampler_zip (GstVideoResampler * resampler, const GstVideoResampler * r1,
const GstVideoResampler * r2)
{
guint i, out_size, max_taps;
gdouble *taps;
guint32 *offset, *phase;
g_return_if_fail (r1->max_taps == r2->max_taps);
out_size = r1->out_size + r2->out_size;
max_taps = r1->max_taps;
offset = g_malloc (sizeof (guint32) * out_size);
phase = g_malloc (sizeof (guint32) * out_size);
taps = g_malloc (sizeof (gdouble) * max_taps * out_size);
resampler->in_size = r1->in_size + r2->in_size;
resampler->out_size = out_size;
resampler->max_taps = max_taps;
resampler->offset = offset;
resampler->phase = phase;
resampler->n_taps = g_malloc (sizeof (guint32) * out_size);
resampler->taps = taps;
for (i = 0; i < out_size; i++) {
guint idx = i / 2;
const GstVideoResampler *r;
r = (i & 1) ? r2 : r1;
offset[i] = r->offset[idx] * 2 + (i & 1);
phase[i] = i;
memcpy (taps + i * max_taps, r->taps + idx * max_taps,
max_taps * sizeof (gdouble));
}
}
/**
* gst_video_scaler_new:
* @method: a #GstVideoResamplerMethod
* @flags: #GstVideoScalerFlags
* @n_taps: number of taps to use
* @in_size: number of source elements
* @out_size: number of destination elements
* @options: (allow none): extra options
*
* Make a new @method video scaler. @in_size source lines/pixels will
* be scaled to @out_size destination lines/pixels.
*
* @n_taps specifies the amount of pixels to use from the source for one output
* pixel. If n_taps is 0, this function chooses a good value automatically based
* on the @method and @in_size/@out_size.
*
* Returns: a #GstVideoResample
*/
GstVideoScaler *
gst_video_scaler_new (GstVideoResamplerMethod method, GstVideoScalerFlags flags,
guint n_taps, guint in_size, guint out_size, GstStructure * options)
{
GstVideoScaler *scale;
g_return_val_if_fail (in_size != 0, NULL);
g_return_val_if_fail (out_size != 0, NULL);
scale = g_slice_new0 (GstVideoScaler);
GST_DEBUG ("%d %u %u->%u", method, n_taps, in_size, out_size);
scale->method = method;
scale->flags = flags;
if (flags & GST_VIDEO_SCALER_FLAG_INTERLACED) {
GstVideoResampler tresamp, bresamp;
gst_video_resampler_init (&tresamp, method, 0, (out_size + 1) / 2, n_taps,
0.0, (in_size + 1) / 2, (out_size + 1) / 2, options);
gst_video_resampler_init (&bresamp, method, 0, out_size - tresamp.out_size,
n_taps, -1.0, in_size - tresamp.in_size,
out_size - tresamp.out_size, options);
resampler_zip (&scale->resampler, &tresamp, &bresamp);
gst_video_resampler_clear (&tresamp);
gst_video_resampler_clear (&bresamp);
} else {
gst_video_resampler_init (&scale->resampler, method, flags, out_size,
n_taps, 0.0, in_size, out_size, options);
}
if (out_size == 1)
scale->inc = 0;
else
scale->inc = ((in_size - 1) << 16) / (out_size - 1) - 1;
return scale;
}
/**
* gst_video_scaler_free:
* @scale: a #GstVideoScaler
*
* Free a previously allocated #GstVideoScaler @scale.
*/
void
gst_video_scaler_free (GstVideoScaler * scale)
{
g_return_if_fail (scale != NULL);
gst_video_resampler_clear (&scale->resampler);
g_free (scale->taps_s16);
g_slice_free (GstVideoScaler, scale);
}
/**
* gst_video_scaler_get_coeff:
* @scale: a #GstVideoScaler
* @out_offset: an output offset
* @in_offset: result input offset
* @n_taps: result n_taps
*
* For a given pixel at @out_offset, get the first required input pixel at
* @in_offset and the @n_taps filter coefficients.
*
* Note that for interlaced content, @in_offset needs to be incremented with
* 2 to get the next input line.
*
* Returns: an array of @n_tap gdouble values with filter coefficients.
*/
const gdouble *
gst_video_scaler_get_coeff (GstVideoScaler * scale,
guint out_offset, guint * in_offset, guint * n_taps)
{
guint offset, phase;
g_return_val_if_fail (scale != NULL, NULL);
g_return_val_if_fail (out_offset < scale->resampler.out_size, NULL);
offset = scale->resampler.offset[out_offset];
phase = scale->resampler.phase[out_offset];
if (in_offset)
*in_offset = offset;
if (n_taps) {
*n_taps = scale->resampler.max_taps;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
*n_taps *= 2;
}
return scale->resampler.taps + phase * scale->resampler.max_taps;
}
static gboolean
resampler_convert_coeff (const gdouble * src,
gpointer dest, guint n, guint bits, guint precision)
{
gdouble multiplier;
gint i, j;
gdouble offset, l_offset, h_offset;
gboolean exact = FALSE;
multiplier = (1 << precision);
/* Round to integer, but with an adjustable bias that we use to
* eliminate the DC error. */
l_offset = 0.0;
h_offset = 1.0;
offset = 0.5;
for (i = 0; i < 64; i++) {
gint sum = 0;
for (j = 0; j < n; j++) {
gint16 tap = floor (offset + src[j] * multiplier);
((gint16 *) dest)[j] = tap;
sum += tap;
}
if (sum == (1 << precision)) {
exact = TRUE;
break;
}
if (l_offset == h_offset)
break;
if (sum < (1 << precision)) {
if (offset > l_offset)
l_offset = offset;
offset += (h_offset - l_offset) / 2;
} else {
if (offset < h_offset)
h_offset = offset;
offset -= (h_offset - l_offset) / 2;
}
}
if (!exact)
GST_WARNING ("can't find exact taps");
return exact;
}
static void
make_s16_taps (GstVideoScaler * scale, gint precision)
{
gint i, max_taps, n_phases;
gint16 *taps_s16;
gdouble *taps;
n_phases = scale->resampler.n_phases;
max_taps = scale->resampler.max_taps;
taps = scale->resampler.taps;
taps_s16 = scale->taps_s16 = g_malloc (sizeof (gint16) * n_phases * max_taps);
for (i = 0; i < n_phases; i++) {
resampler_convert_coeff (taps, taps_s16, max_taps, 16, precision);
taps += max_taps;
taps_s16 += max_taps;
}
}
static void
video_scale_h_near_8888 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
guint32 *s, *d;
d = (guint32 *) dest + dest_offset;
s = (guint32 *) src;
#if 0
/* ORC is slower on this */
video_orc_resample_h_near_8888_lq (d, s, 0, scale->inc, width);
#else
{
gint i;
guint32 *offset;
offset = scale->resampler.offset + dest_offset;
for (i = 0; i < width; i++)
d[i] = s[offset[i]];
}
#endif
}
#if 0
#define BLEND_2TAP(a,b,p) (((((b)-(guint16)(a)) * p + S16_SCALE_ROUND) >> S16_SCALE) + (a))
static void
video_scale_h_2tap_8888 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
gint i, max_taps, sum0, sum1, sum2, sum3;
guint8 *s1, *s2, *d;
guint32 *offset, *phase;
gint16 *taps, *t;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, S16_SCALE);
max_taps = scale->resampler.max_taps;
offset = scale->resampler.offset + dest_offset;
phase = scale->resampler.phase + dest_offset;
taps = scale->taps_s16;
d = (guint8 *) dest + 4 * dest_offset;
for (i = 0; i < width; i++) {
s1 = (guint8 *) src + 4 * offset[i];
s2 = s1 + 4;
t = taps + (phase[i] * max_taps);
sum0 = BLEND_2TAP (s1[0], s2[0], t[1]);
sum1 = BLEND_2TAP (s1[1], s2[1], t[1]);
sum2 = BLEND_2TAP (s1[2], s2[2], t[1]);
sum3 = BLEND_2TAP (s1[3], s2[3], t[1]);
d[i * 4 + 0] = CLAMP (sum0, 0, 255);
d[i * 4 + 1] = CLAMP (sum1, 0, 255);
d[i * 4 + 2] = CLAMP (sum2, 0, 255);
d[i * 4 + 3] = CLAMP (sum3, 0, 255);
}
}
#endif
static void
video_scale_h_2tap_8888 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
guint32 *s, *d;
d = (guint32 *) dest + dest_offset;
s = (guint32 *) src;
video_orc_resample_h_2tap_8888_lq (d, s, 0, scale->inc, width);
}
static void
video_scale_h_ntap_8888 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
gint16 *taps, *t;
gint i, j, max_taps, sum0, sum1, sum2, sum3;
guint8 *s, *d;
guint32 *offset, *phase;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, S16_SCALE);
max_taps = scale->resampler.max_taps;
offset = scale->resampler.offset + dest_offset;
phase = scale->resampler.phase + dest_offset;
taps = scale->taps_s16;
d = (guint8 *) dest + 4 * dest_offset;
for (i = 0; i < width; i++) {
s = (guint8 *) src + 4 * offset[i];
t = taps + (phase[i] * max_taps);
sum0 = sum1 = sum2 = sum3 = 0;
for (j = 0; j < max_taps; j++) {
sum0 += t[j] * s[j * 4 + 0];
sum1 += t[j] * s[j * 4 + 1];
sum2 += t[j] * s[j * 4 + 2];
sum3 += t[j] * s[j * 4 + 3];
}
sum0 = (sum0 + S16_SCALE_ROUND) >> S16_SCALE;
sum1 = (sum1 + S16_SCALE_ROUND) >> S16_SCALE;
sum2 = (sum2 + S16_SCALE_ROUND) >> S16_SCALE;
sum3 = (sum3 + S16_SCALE_ROUND) >> S16_SCALE;
d[i * 4 + 0] = CLAMP (sum0, 0, 255);
d[i * 4 + 1] = CLAMP (sum1, 0, 255);
d[i * 4 + 2] = CLAMP (sum2, 0, 255);
d[i * 4 + 3] = CLAMP (sum3, 0, 255);
}
}
static void
video_scale_v_near_8888 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{
orc_memcpy (dest, srcs[0], 4 * width);
}
static void
video_scale_v_2tap_8888 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{
gint max_taps;
guint32 *s1, *s2, *d;
guint64 p1;
if (scale->taps_s16 == NULL)
#ifdef LQ
make_s16_taps (scale, 8);
#else
make_s16_taps (scale, S16_SCALE);
#endif
max_taps = scale->resampler.max_taps;
d = (guint32 *) dest;
s1 = (guint32 *) srcs[0];
s2 = (guint32 *) srcs[1];
p1 = scale->taps_s16[dest_offset * max_taps + 1];
#ifdef LQ
video_orc_resample_v_2tap_8_lq (d, s1, s2, p1, width * 4);
#else
video_orc_resample_v_2tap_8 (d, s1, s2, p1, width * 4);
#endif
}
static void
video_scale_v_4tap_8888 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{
gint max_taps;
guint32 *s1, *s2, *s3, *s4, *d;
gint p1, p2, p3, p4;
gint16 *taps;
if (scale->taps_s16 == NULL)
#ifdef LQ
make_s16_taps (scale, 6);
#else
make_s16_taps (scale, S16_SCALE);
#endif
max_taps = scale->resampler.max_taps;
taps = scale->taps_s16 + dest_offset * max_taps;
d = (guint32 *) dest;
s1 = (guint32 *) srcs[0];
s2 = (guint32 *) srcs[1];
s3 = (guint32 *) srcs[2];
s4 = (guint32 *) srcs[3];
p1 = taps[0];
p2 = taps[1];
p3 = taps[2];
p4 = taps[3];
#ifdef LQ
video_orc_resample_v_4tap_8_lq (d, s1, s2, s3, s4, p1, p2, p3, p4, width * 4);
#else
video_orc_resample_v_4tap_8 (d, s1, s2, s3, s4, p1, p2, p3, p4, width * 4);
#endif
}
static void
video_scale_v_ntap_8888 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{
gint16 *t;
gint i, j, k, max_taps, sum0, sum1, sum2, sum3, src_inc;
guint8 *s, *d;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, S16_SCALE);
max_taps = scale->resampler.max_taps;
t = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
d = (guint8 *) dest;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2;
else
src_inc = 1;
for (i = 0; i < width; i++) {
sum0 = sum1 = sum2 = sum3 = 0;
for (j = 0, k = 0; j < max_taps; j++, k += src_inc) {
s = (guint8 *) (srcs[k]);
sum0 += t[j] * s[4 * i + 0];
sum1 += t[j] * s[4 * i + 1];
sum2 += t[j] * s[4 * i + 2];
sum3 += t[j] * s[4 * i + 3];
}
sum0 = (sum0 + S16_SCALE_ROUND) >> S16_SCALE;
sum1 = (sum1 + S16_SCALE_ROUND) >> S16_SCALE;
sum2 = (sum2 + S16_SCALE_ROUND) >> S16_SCALE;
sum3 = (sum3 + S16_SCALE_ROUND) >> S16_SCALE;
d[i * 4 + 0] = CLAMP (sum0, 0, 255);
d[i * 4 + 1] = CLAMP (sum1, 0, 255);
d[i * 4 + 2] = CLAMP (sum2, 0, 255);
d[i * 4 + 3] = CLAMP (sum3, 0, 255);
}
}
/**
* gst_video_scaler_horizontal:
* @scale: a #GstVideoScaler
* @format: a #GstVideoFormat for @src and @dest
* @src: source pixels
* @dest: destination pixels
* @dest_offset: the horizontal destination offset
* @width: the number of pixels to scale
*
* Horizontally scale the pixels in @src to @dest, starting from @dest_offset
* for @width samples.
*/
void
gst_video_scaler_horizontal (GstVideoScaler * scale, GstVideoFormat format,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
GstVideoScalerHFunc func;
g_return_if_fail (scale != NULL);
g_return_if_fail (src != NULL);
g_return_if_fail (dest != NULL);
g_return_if_fail (dest_offset + width <= scale->resampler.out_size);
switch (scale->resampler.max_taps) {
case 1:
func = video_scale_h_near_8888;
break;
case 2:
func = video_scale_h_2tap_8888;
break;
default:
func = video_scale_h_ntap_8888;
break;
}
func (scale, src, dest, dest_offset, width);
}
/**
* gst_video_scaler_vertical:
* @scale: a #GstVideoScaler
* @format: a #GstVideoFormat for @srcs and @dest
* @srcs: source pixels lines
* @dest: destination pixels
* @dest_offset: the vertical destination offset
* @width: the number of pixels to scale
*
* Vertically combine @width pixels in the lines in @srcs to @dest.
* @dest is the location of the target line at @dest_offset and
* @srcs are the input lines for @dest_offset, as obtained with
* gst_video_scaler_get_info().
*/
void
gst_video_scaler_vertical (GstVideoScaler * scale, GstVideoFormat format,
gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{
GstVideoScalerVFunc func;
g_return_if_fail (scale != NULL);
g_return_if_fail (srcs != NULL);
g_return_if_fail (dest != NULL);
g_return_if_fail (dest_offset <= scale->resampler.out_size);
switch (scale->resampler.max_taps) {
case 1:
func = video_scale_v_near_8888;
break;
case 2:
func = video_scale_v_2tap_8888;
break;
case 4:
func = video_scale_v_4tap_8888;
break;
default:
func = video_scale_v_ntap_8888;
break;
}
func (scale, srcs, dest, dest_offset, width);
}