/* GStreamer * Copyright (C) <2014> Wim Taymans * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #include #ifndef DISABLE_ORC #include #else #define orc_memcpy memcpy #endif #include "video-orc.h" #include "video-scaler.h" #define S16_SCALE 12 #define S16_SCALE_ROUND (1 << (S16_SCALE -1)) #define LQ typedef void (*GstVideoScalerHFunc) (GstVideoScaler * scale, gpointer src, gpointer dest, guint dest_offset, guint width); typedef void (*GstVideoScalerVFunc) (GstVideoScaler * scale, gpointer srcs[], gpointer dest, guint dest_offset, guint width); struct _GstVideoScaler { GstVideoResamplerMethod method; GstVideoScalerFlags flags; GstVideoResampler resampler; /* cached integer coefficients */ gint16 *taps_s16; /* for ORC */ gint inc; }; static void resampler_zip (GstVideoResampler * resampler, const GstVideoResampler * r1, const GstVideoResampler * r2) { guint i, out_size, max_taps; gdouble *taps; guint32 *offset, *phase; g_return_if_fail (r1->max_taps == r2->max_taps); out_size = r1->out_size + r2->out_size; max_taps = r1->max_taps; offset = g_malloc (sizeof (guint32) * out_size); phase = g_malloc (sizeof (guint32) * out_size); taps = g_malloc (sizeof (gdouble) * max_taps * out_size); resampler->in_size = r1->in_size + r2->in_size; resampler->out_size = out_size; resampler->max_taps = max_taps; resampler->offset = offset; resampler->phase = phase; resampler->n_taps = g_malloc (sizeof (guint32) * out_size); resampler->taps = taps; for (i = 0; i < out_size; i++) { guint idx = i / 2; const GstVideoResampler *r; r = (i & 1) ? r2 : r1; offset[i] = r->offset[idx] * 2 + (i & 1); phase[i] = i; memcpy (taps + i * max_taps, r->taps + idx * max_taps, max_taps * sizeof (gdouble)); } } /** * gst_video_scaler_new: * @method: a #GstVideoResamplerMethod * @flags: #GstVideoScalerFlags * @n_taps: number of taps to use * @in_size: number of source elements * @out_size: number of destination elements * @options: (allow none): extra options * * Make a new @method video scaler. @in_size source lines/pixels will * be scaled to @out_size destination lines/pixels. * * @n_taps specifies the amount of pixels to use from the source for one output * pixel. If n_taps is 0, this function chooses a good value automatically based * on the @method and @in_size/@out_size. * * Returns: a #GstVideoResample */ GstVideoScaler * gst_video_scaler_new (GstVideoResamplerMethod method, GstVideoScalerFlags flags, guint n_taps, guint in_size, guint out_size, GstStructure * options) { GstVideoScaler *scale; g_return_val_if_fail (in_size != 0, NULL); g_return_val_if_fail (out_size != 0, NULL); scale = g_slice_new0 (GstVideoScaler); GST_DEBUG ("%d %u %u->%u", method, n_taps, in_size, out_size); scale->method = method; scale->flags = flags; if (flags & GST_VIDEO_SCALER_FLAG_INTERLACED) { GstVideoResampler tresamp, bresamp; gst_video_resampler_init (&tresamp, method, 0, (out_size + 1) / 2, n_taps, 0.0, (in_size + 1) / 2, (out_size + 1) / 2, options); gst_video_resampler_init (&bresamp, method, 0, out_size - tresamp.out_size, n_taps, -1.0, in_size - tresamp.in_size, out_size - tresamp.out_size, options); resampler_zip (&scale->resampler, &tresamp, &bresamp); gst_video_resampler_clear (&tresamp); gst_video_resampler_clear (&bresamp); } else { gst_video_resampler_init (&scale->resampler, method, flags, out_size, n_taps, 0.0, in_size, out_size, options); } if (out_size == 1) scale->inc = 0; else scale->inc = ((in_size - 1) << 16) / (out_size - 1) - 1; return scale; } /** * gst_video_scaler_free: * @scale: a #GstVideoScaler * * Free a previously allocated #GstVideoScaler @scale. */ void gst_video_scaler_free (GstVideoScaler * scale) { g_return_if_fail (scale != NULL); gst_video_resampler_clear (&scale->resampler); g_free (scale->taps_s16); g_slice_free (GstVideoScaler, scale); } /** * gst_video_scaler_get_coeff: * @scale: a #GstVideoScaler * @out_offset: an output offset * @in_offset: result input offset * @n_taps: result n_taps * * For a given pixel at @out_offset, get the first required input pixel at * @in_offset and the @n_taps filter coefficients. * * Note that for interlaced content, @in_offset needs to be incremented with * 2 to get the next input line. * * Returns: an array of @n_tap gdouble values with filter coefficients. */ const gdouble * gst_video_scaler_get_coeff (GstVideoScaler * scale, guint out_offset, guint * in_offset, guint * n_taps) { guint offset, phase; g_return_val_if_fail (scale != NULL, NULL); g_return_val_if_fail (out_offset < scale->resampler.out_size, NULL); offset = scale->resampler.offset[out_offset]; phase = scale->resampler.phase[out_offset]; if (in_offset) *in_offset = offset; if (n_taps) { *n_taps = scale->resampler.max_taps; if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED) *n_taps *= 2; } return scale->resampler.taps + phase * scale->resampler.max_taps; } static gboolean resampler_convert_coeff (const gdouble * src, gpointer dest, guint n, guint bits, guint precision) { gdouble multiplier; gint i, j; gdouble offset, l_offset, h_offset; gboolean exact = FALSE; multiplier = (1 << precision); /* Round to integer, but with an adjustable bias that we use to * eliminate the DC error. */ l_offset = 0.0; h_offset = 1.0; offset = 0.5; for (i = 0; i < 64; i++) { gint sum = 0; for (j = 0; j < n; j++) { gint16 tap = floor (offset + src[j] * multiplier); ((gint16 *) dest)[j] = tap; sum += tap; } if (sum == (1 << precision)) { exact = TRUE; break; } if (l_offset == h_offset) break; if (sum < (1 << precision)) { if (offset > l_offset) l_offset = offset; offset += (h_offset - l_offset) / 2; } else { if (offset < h_offset) h_offset = offset; offset -= (h_offset - l_offset) / 2; } } if (!exact) GST_WARNING ("can't find exact taps"); return exact; } static void make_s16_taps (GstVideoScaler * scale, gint precision) { gint i, max_taps, n_phases; gint16 *taps_s16; gdouble *taps; n_phases = scale->resampler.n_phases; max_taps = scale->resampler.max_taps; taps = scale->resampler.taps; taps_s16 = scale->taps_s16 = g_malloc (sizeof (gint16) * n_phases * max_taps); for (i = 0; i < n_phases; i++) { resampler_convert_coeff (taps, taps_s16, max_taps, 16, precision); taps += max_taps; taps_s16 += max_taps; } } static void video_scale_h_near_8888 (GstVideoScaler * scale, gpointer src, gpointer dest, guint dest_offset, guint width) { guint32 *s, *d; d = (guint32 *) dest + dest_offset; s = (guint32 *) src; #if 0 /* ORC is slower on this */ video_orc_resample_h_near_8888_lq (d, s, 0, scale->inc, width); #else { gint i; guint32 *offset; offset = scale->resampler.offset + dest_offset; for (i = 0; i < width; i++) d[i] = s[offset[i]]; } #endif } #if 0 #define BLEND_2TAP(a,b,p) (((((b)-(guint16)(a)) * p + S16_SCALE_ROUND) >> S16_SCALE) + (a)) static void video_scale_h_2tap_8888 (GstVideoScaler * scale, gpointer src, gpointer dest, guint dest_offset, guint width) { gint i, max_taps, sum0, sum1, sum2, sum3; guint8 *s1, *s2, *d; guint32 *offset, *phase; gint16 *taps, *t; if (scale->taps_s16 == NULL) make_s16_taps (scale, S16_SCALE); max_taps = scale->resampler.max_taps; offset = scale->resampler.offset + dest_offset; phase = scale->resampler.phase + dest_offset; taps = scale->taps_s16; d = (guint8 *) dest + 4 * dest_offset; for (i = 0; i < width; i++) { s1 = (guint8 *) src + 4 * offset[i]; s2 = s1 + 4; t = taps + (phase[i] * max_taps); sum0 = BLEND_2TAP (s1[0], s2[0], t[1]); sum1 = BLEND_2TAP (s1[1], s2[1], t[1]); sum2 = BLEND_2TAP (s1[2], s2[2], t[1]); sum3 = BLEND_2TAP (s1[3], s2[3], t[1]); d[i * 4 + 0] = CLAMP (sum0, 0, 255); d[i * 4 + 1] = CLAMP (sum1, 0, 255); d[i * 4 + 2] = CLAMP (sum2, 0, 255); d[i * 4 + 3] = CLAMP (sum3, 0, 255); } } #endif static void video_scale_h_2tap_8888 (GstVideoScaler * scale, gpointer src, gpointer dest, guint dest_offset, guint width) { guint32 *s, *d; d = (guint32 *) dest + dest_offset; s = (guint32 *) src; video_orc_resample_h_2tap_8888_lq (d, s, 0, scale->inc, width); } static void video_scale_h_ntap_8888 (GstVideoScaler * scale, gpointer src, gpointer dest, guint dest_offset, guint width) { gint16 *taps, *t; gint i, j, max_taps, sum0, sum1, sum2, sum3; guint8 *s, *d; guint32 *offset, *phase; if (scale->taps_s16 == NULL) make_s16_taps (scale, S16_SCALE); max_taps = scale->resampler.max_taps; offset = scale->resampler.offset + dest_offset; phase = scale->resampler.phase + dest_offset; taps = scale->taps_s16; d = (guint8 *) dest + 4 * dest_offset; for (i = 0; i < width; i++) { s = (guint8 *) src + 4 * offset[i]; t = taps + (phase[i] * max_taps); sum0 = sum1 = sum2 = sum3 = 0; for (j = 0; j < max_taps; j++) { sum0 += t[j] * s[j * 4 + 0]; sum1 += t[j] * s[j * 4 + 1]; sum2 += t[j] * s[j * 4 + 2]; sum3 += t[j] * s[j * 4 + 3]; } sum0 = (sum0 + S16_SCALE_ROUND) >> S16_SCALE; sum1 = (sum1 + S16_SCALE_ROUND) >> S16_SCALE; sum2 = (sum2 + S16_SCALE_ROUND) >> S16_SCALE; sum3 = (sum3 + S16_SCALE_ROUND) >> S16_SCALE; d[i * 4 + 0] = CLAMP (sum0, 0, 255); d[i * 4 + 1] = CLAMP (sum1, 0, 255); d[i * 4 + 2] = CLAMP (sum2, 0, 255); d[i * 4 + 3] = CLAMP (sum3, 0, 255); } } static void video_scale_v_near_8888 (GstVideoScaler * scale, gpointer srcs[], gpointer dest, guint dest_offset, guint width) { orc_memcpy (dest, srcs[0], 4 * width); } static void video_scale_v_2tap_8888 (GstVideoScaler * scale, gpointer srcs[], gpointer dest, guint dest_offset, guint width) { gint max_taps; guint32 *s1, *s2, *d; guint64 p1; if (scale->taps_s16 == NULL) #ifdef LQ make_s16_taps (scale, 8); #else make_s16_taps (scale, S16_SCALE); #endif max_taps = scale->resampler.max_taps; d = (guint32 *) dest; s1 = (guint32 *) srcs[0]; s2 = (guint32 *) srcs[1]; p1 = scale->taps_s16[dest_offset * max_taps + 1]; #ifdef LQ video_orc_resample_v_2tap_8_lq (d, s1, s2, p1, width * 4); #else video_orc_resample_v_2tap_8 (d, s1, s2, p1, width * 4); #endif } static void video_scale_v_4tap_8888 (GstVideoScaler * scale, gpointer srcs[], gpointer dest, guint dest_offset, guint width) { gint max_taps; guint32 *s1, *s2, *s3, *s4, *d; gint p1, p2, p3, p4; gint16 *taps; if (scale->taps_s16 == NULL) #ifdef LQ make_s16_taps (scale, 6); #else make_s16_taps (scale, S16_SCALE); #endif max_taps = scale->resampler.max_taps; taps = scale->taps_s16 + dest_offset * max_taps; d = (guint32 *) dest; s1 = (guint32 *) srcs[0]; s2 = (guint32 *) srcs[1]; s3 = (guint32 *) srcs[2]; s4 = (guint32 *) srcs[3]; p1 = taps[0]; p2 = taps[1]; p3 = taps[2]; p4 = taps[3]; #ifdef LQ video_orc_resample_v_4tap_8_lq (d, s1, s2, s3, s4, p1, p2, p3, p4, width * 4); #else video_orc_resample_v_4tap_8 (d, s1, s2, s3, s4, p1, p2, p3, p4, width * 4); #endif } static void video_scale_v_ntap_8888 (GstVideoScaler * scale, gpointer srcs[], gpointer dest, guint dest_offset, guint width) { gint16 *t; gint i, j, k, max_taps, sum0, sum1, sum2, sum3, src_inc; guint8 *s, *d; if (scale->taps_s16 == NULL) make_s16_taps (scale, S16_SCALE); max_taps = scale->resampler.max_taps; t = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps); d = (guint8 *) dest; if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED) src_inc = 2; else src_inc = 1; for (i = 0; i < width; i++) { sum0 = sum1 = sum2 = sum3 = 0; for (j = 0, k = 0; j < max_taps; j++, k += src_inc) { s = (guint8 *) (srcs[k]); sum0 += t[j] * s[4 * i + 0]; sum1 += t[j] * s[4 * i + 1]; sum2 += t[j] * s[4 * i + 2]; sum3 += t[j] * s[4 * i + 3]; } sum0 = (sum0 + S16_SCALE_ROUND) >> S16_SCALE; sum1 = (sum1 + S16_SCALE_ROUND) >> S16_SCALE; sum2 = (sum2 + S16_SCALE_ROUND) >> S16_SCALE; sum3 = (sum3 + S16_SCALE_ROUND) >> S16_SCALE; d[i * 4 + 0] = CLAMP (sum0, 0, 255); d[i * 4 + 1] = CLAMP (sum1, 0, 255); d[i * 4 + 2] = CLAMP (sum2, 0, 255); d[i * 4 + 3] = CLAMP (sum3, 0, 255); } } /** * gst_video_scaler_horizontal: * @scale: a #GstVideoScaler * @format: a #GstVideoFormat for @src and @dest * @src: source pixels * @dest: destination pixels * @dest_offset: the horizontal destination offset * @width: the number of pixels to scale * * Horizontally scale the pixels in @src to @dest, starting from @dest_offset * for @width samples. */ void gst_video_scaler_horizontal (GstVideoScaler * scale, GstVideoFormat format, gpointer src, gpointer dest, guint dest_offset, guint width) { GstVideoScalerHFunc func; g_return_if_fail (scale != NULL); g_return_if_fail (src != NULL); g_return_if_fail (dest != NULL); g_return_if_fail (dest_offset + width <= scale->resampler.out_size); switch (scale->resampler.max_taps) { case 1: func = video_scale_h_near_8888; break; case 2: func = video_scale_h_2tap_8888; break; default: func = video_scale_h_ntap_8888; break; } func (scale, src, dest, dest_offset, width); } /** * gst_video_scaler_vertical: * @scale: a #GstVideoScaler * @format: a #GstVideoFormat for @srcs and @dest * @srcs: source pixels lines * @dest: destination pixels * @dest_offset: the vertical destination offset * @width: the number of pixels to scale * * Vertically combine @width pixels in the lines in @srcs to @dest. * @dest is the location of the target line at @dest_offset and * @srcs are the input lines for @dest_offset, as obtained with * gst_video_scaler_get_info(). */ void gst_video_scaler_vertical (GstVideoScaler * scale, GstVideoFormat format, gpointer srcs[], gpointer dest, guint dest_offset, guint width) { GstVideoScalerVFunc func; g_return_if_fail (scale != NULL); g_return_if_fail (srcs != NULL); g_return_if_fail (dest != NULL); g_return_if_fail (dest_offset <= scale->resampler.out_size); switch (scale->resampler.max_taps) { case 1: func = video_scale_v_near_8888; break; case 2: func = video_scale_v_2tap_8888; break; case 4: func = video_scale_v_4tap_8888; break; default: func = video_scale_v_ntap_8888; break; } func (scale, srcs, dest, dest_offset, width); }