video-scaler: add support for 16bits formats

Add scaler functions for 16 bits formats.
Rename the scaler functions so that 16bits versions don't look too
weird.
Remove old unused h_2tap functions
Fix v_ntap functions, it was using 1 tap too little.
This commit is contained in:
Wim Taymans 2014-11-03 15:36:26 +01:00
parent e72a01e949
commit bd6d2b40d1
2 changed files with 355 additions and 140 deletions

View file

@ -1285,7 +1285,7 @@ x4 addssw aq, aq, q1
x4 convssswb ayuv2, aq x4 convssswb ayuv2, aq
x4 addb ayuv, ayuv2, c128 x4 addb ayuv, ayuv2, c128
#.function video_orc_resample_h_near_8888 #.function video_orc_resample_h_near_u32
#.source 4 src guint32 #.source 4 src guint32
#.source 4 idx #.source 4 idx
#.dest 4 dest guint32 #.dest 4 dest guint32
@ -1294,7 +1294,7 @@ x4 addb ayuv, ayuv2, c128
#loadidxl t, src, idx #loadidxl t, src, idx
#storel dest, t #storel dest, t
.function video_orc_resample_h_near_8888_lq .function video_orc_resample_h_near_u32_lq
.dest 4 d1 guint32 .dest 4 d1 guint32
.source 4 s1 guint32 .source 4 s1 guint32
.param 4 p1 .param 4 p1
@ -1302,52 +1302,7 @@ x4 addb ayuv, ayuv2, c128
ldresnearl d1, s1, p1, p2 ldresnearl d1, s1, p1, p2
#.function video_orc_resample_h_2tap_8888_16 .function video_orc_resample_h_2tap_4u8_lq
#.source 4 src1 guint32
#.source 4 src2 guint32
#.source 8 coef1 guint64
#.source 8 coef2 guint64
#.source 4 idx
#.dest 4 dest guint32
#.temp 4 t1
#.temp 4 t2
#.temp 8 q1
#.temp 8 q2
#
#loadidxl t1, src1, idx
#x4 convubw q1, t1
#x4 mulhuw q1, q1, coef1
#
#loadidxl t2, src2, idx
#x4 convubw q2, t2
#x4 mulhuw q2, q2, coef2
#
#x4 addw q2, q2, q1
#x4 convuuswb dest, q2
#
#.function video_orc_resample_h_2tap_8888_lq
#.source 4 src1 guint32
#.source 4 src2 guint32
#.source 8 coef1 guint64
#.source 4 idx
#.dest 4 dest guint32
#.temp 4 t1
#.temp 4 t2
#.temp 8 q1
#.temp 8 q2
#
#loadidxl t1, src1, idx
#x4 convubw q1, t1
#loadidxl t2, src2, idx
#x4 convubw q2, t2
#x4 subw q2, q2, q1
#
#x4 mullw q2, q2, coef1
#x4 addw q2, q2, 128
#x4 convhwb t2, q2
#x4 addb dest, t2, t1
.function video_orc_resample_h_2tap_8888_lq
.dest 4 d1 guint32 .dest 4 d1 guint32
.source 4 s1 guint32 .source 4 s1 guint32
.param 4 p1 .param 4 p1
@ -1355,11 +1310,11 @@ ldresnearl d1, s1, p1, p2
ldreslinl d1, s1, p1, p2 ldreslinl d1, s1, p1, p2
.function video_orc_resample_v_2tap_8_lq .function video_orc_resample_v_2tap_u8_lq
.source 1 src1 guint32 .source 1 src1 guint32
.source 1 src2 guint32 .source 1 src2 guint32
.dest 1 dest guint32 .dest 1 dest guint32
.param 2 p1 .param 2 p1 gint16
.temp 1 t .temp 1 t
.temp 2 w1 .temp 2 w1
.temp 2 w2 .temp 2 w2
@ -1372,11 +1327,31 @@ addw w2, w2, 128
convhwb t, w2 convhwb t, w2
addb dest, t, src1 addb dest, t, src1
.function video_orc_resample_v_2tap_8 .function video_orc_resample_v_2tap_u16
.source 2 src1 guint64
.source 2 src2 guint64
.dest 2 dest guint64
.param 4 p1 gint16
.temp 2 t
.temp 4 l1
.temp 4 l2
.temp 4 l3
convuwl l1, src1
convuwl l2, src2
subl l2, l2, l1
convuwl l3, p1
mulll l2, l2, l3
addl l2, l2, 4096
shrul l2, l2, 12
convlw t, l2
addw dest, t, src1
.function video_orc_resample_v_2tap_u8
.source 1 s1 guint32 .source 1 s1 guint32
.source 1 s2 guint32 .source 1 s2 guint32
.dest 1 d1 guint32 .dest 1 d1 guint32
.param 2 p1 .param 2 p1 gint16
.temp 1 t .temp 1 t
.temp 2 w1 .temp 2 w1
.temp 2 w2 .temp 2 w2
@ -1393,16 +1368,16 @@ convlw w2, t2
addw w2, w2, w1 addw w2, w2, w1
convsuswb d1, w2 convsuswb d1, w2
.function video_orc_resample_v_4tap_8_lq .function video_orc_resample_v_4tap_u8_lq
.source 1 s1 guint32 .source 1 s1 guint32
.source 1 s2 guint32 .source 1 s2 guint32
.source 1 s3 guint32 .source 1 s3 guint32
.source 1 s4 guint32 .source 1 s4 guint32
.dest 1 d1 guint32 .dest 1 d1 guint32
.param 2 p1 .param 2 p1 gint16
.param 2 p2 .param 2 p2 gint16
.param 2 p3 .param 2 p3 gint16
.param 2 p4 .param 2 p4 gint16
.temp 2 w1 .temp 2 w1
.temp 2 w2 .temp 2 w2
@ -1421,16 +1396,16 @@ addw w1, w1, 32
shrsw w1, w1, 6 shrsw w1, w1, 6
convsuswb d1, w1 convsuswb d1, w1
.function video_orc_resample_v_4tap_8 .function video_orc_resample_v_4tap_u8
.source 1 s1 guint32 .source 1 s1 guint32
.source 1 s2 guint32 .source 1 s2 guint32
.source 1 s3 guint32 .source 1 s3 guint32
.source 1 s4 guint32 .source 1 s4 guint32
.dest 1 d1 guint32 .dest 1 d1 guint32
.param 2 p1 .param 2 p1 gint16
.param 2 p2 .param 2 p2 gint16
.param 2 p3 .param 2 p3 gint16
.param 2 p4 .param 2 p4 gint16
.temp 2 w1 .temp 2 w1
.temp 2 w2 .temp 2 w2
.temp 4 t1 .temp 4 t1
@ -1452,7 +1427,8 @@ shrsl t1, t1, 12
convlw w1, t1 convlw w1, t1
convsuswb d1, w1 convsuswb d1, w1
#.function video_orc_resample_h_4tap_8 # crashes ORC for now but is potentially faster
#.function video_orc_resample_h_4tap_u8
#.source 1 s1 guint32 #.source 1 s1 guint32
#.source 1 s2 guint32 #.source 1 s2 guint32
#.source 1 s3 guint32 #.source 1 s3 guint32
@ -1484,7 +1460,7 @@ convsuswb d1, w1
#convsuswb d1, w1 #convsuswb d1, w1
.function video_orc_resample_h_multaps_8 .function video_orc_resample_h_multaps_u8
.source 1 s guint32 .source 1 s guint32
.source 2 t gint16 .source 2 t gint16
.dest 4 d gint32 .dest 4 d gint32
@ -1493,7 +1469,7 @@ convsuswb d1, w1
convubw w1, s convubw w1, s
mulswl d, w1, t mulswl d, w1, t
.function video_orc_resample_h_muladdtaps_8 .function video_orc_resample_h_muladdtaps_u8
.flags 2d .flags 2d
.source 1 s guint32 .source 1 s guint32
.source 2 t gint16 .source 2 t gint16
@ -1505,9 +1481,9 @@ convubw w1, s
mulswl t1, w1, t mulswl t1, w1, t
addl d, d, t1 addl d, d, t1
.function video_orc_resample_scaletaps_8 .function video_orc_resample_scaletaps_u8
.source 4 s gint32 .source 4 s gint32
.dest 1 d guint8 .dest 1 d guint32
.temp 2 w1 .temp 2 w1
.temp 4 t1 .temp 4 t1
@ -1516,7 +1492,7 @@ shrsl t1, t1, 12
convlw w1, t1 convlw w1, t1
convsuswb d, w1 convsuswb d, w1
.function video_orc_resample_h_multaps_8_lq .function video_orc_resample_h_multaps_u8_lq
.source 1 s guint32 .source 1 s guint32
.source 2 t gint16 .source 2 t gint16
.dest 2 d gint32 .dest 2 d gint32
@ -1525,28 +1501,60 @@ convsuswb d, w1
convubw w1, s convubw w1, s
mullw d, w1, t mullw d, w1, t
.function video_orc_resample_h_muladdtaps_8_lq .function video_orc_resample_h_muladdtaps_u8_lq
.flags 2d .flags 2d
.source 1 s guint32 .source 1 s guint32
.source 2 t gint16 .source 2 t gint16
.dest 2 d gint32 .dest 2 d gint32
.temp 2 w1 .temp 2 w1
.temp 2 t1
convubw w1, s convubw w1, s
mullw t1, w1, t mullw w1, w1, t
addw d, d, t1 addw d, d, w1
.function video_orc_resample_scaletaps_8_lq .function video_orc_resample_scaletaps_u8_lq
.source 2 s gint32 .source 2 s gint32
.dest 1 d guint8 .dest 1 d guint32
.temp 2 w1 .temp 2 w1
addw w1, s, 32 addw w1, s, 32
shrsw w1, w1, 6 shrsw w1, w1, 6
convsuswb d, w1 convsuswb d, w1
.function video_orc_resample_v_multaps_8 .function video_orc_resample_h_multaps_u16
.source 2 s guint64
.source 2 t gint16
.dest 4 d gint32
.temp 4 l1
.temp 4 l2
convuwl l1, s
convswl l2, t
mulll d, l1, l2
.function video_orc_resample_h_muladdtaps_u16
.flags 2d
.source 2 s guint64
.source 2 t gint16
.dest 4 d gint32
.temp 4 l1
.temp 4 l2
convuwl l1, s
convswl l2, t
mulll l1, l1, l2
addl d, d, l1
.function video_orc_resample_scaletaps_u16
.source 4 s gint32
.dest 2 d guint64
.temp 4 t1
addl t1, s, 4095
shrsl t1, t1, 12
convsuslw d, t1
.function video_orc_resample_v_multaps_u8
.source 1 s guint32 .source 1 s guint32
.param 2 t gint16 .param 2 t gint16
.dest 4 d gint32 .dest 4 d gint32
@ -1555,7 +1563,7 @@ convsuswb d, w1
convubw w1, s convubw w1, s
mulswl d, w1, t mulswl d, w1, t
.function video_orc_resample_v_muladdtaps_8 .function video_orc_resample_v_muladdtaps_u8
.source 1 s guint32 .source 1 s guint32
.param 2 t gint16 .param 2 t gint16
.dest 4 d gint32 .dest 4 d gint32
@ -1566,7 +1574,28 @@ convubw w1, s
mulswl t1, w1, t mulswl t1, w1, t
addl d, d, t1 addl d, d, t1
.function video_orc_resample_v_multaps_8_lq .function video_orc_resample_v_multaps_u16
.source 2 s guint64
.param 2 t gint16
.dest 4 d gint32
.temp 4 l1
convuwl l1, s
mulll d, l1, t
.function video_orc_resample_v_muladdtaps_u16
.source 2 s guint64
.param 2 t gint16
.dest 4 d gint32
.temp 4 t1
.temp 4 t2
convuwl t1, s
convswl t2, t
mulll t1, t1, t2
addl d, d, t1
.function video_orc_resample_v_multaps_u8_lq
.source 1 s guint32 .source 1 s guint32
.param 2 t gint16 .param 2 t gint16
.dest 2 d gint32 .dest 2 d gint32
@ -1575,13 +1604,12 @@ addl d, d, t1
convubw w1, s convubw w1, s
mullw d, w1, t mullw d, w1, t
.function video_orc_resample_v_muladdtaps_8_lq .function video_orc_resample_v_muladdtaps_u8_lq
.source 1 s guint32 .source 1 s guint32
.param 2 t gint16 .param 2 t gint16
.dest 2 d gint32 .dest 2 d gint32
.temp 2 w1 .temp 2 w1
.temp 2 t1
convubw w1, s convubw w1, s
mullw t1, w1, t mullw w1, w1, t
addw d, d, t1 addw d, d, w1

View file

@ -34,8 +34,12 @@
#include "video-orc.h" #include "video-orc.h"
#include "video-scaler.h" #include "video-scaler.h"
#define S16_SCALE 12 #define SCALE_U8 12
#define S16_SCALE_ROUND (1 << (S16_SCALE -1)) #define SCALE_U8_ROUND (1 << (SCALE_U8 -1))
#define SCALE_U8_LQ 6
#define SCALE_U8_LQ_ROUND (1 << (SCALE_U8_LQ -1))
#define SCALE_U16 12
#define SCALE_U16_ROUND (1 << (SCALE_U16 -1))
#define LQ #define LQ
@ -321,7 +325,7 @@ make_s16_taps (GstVideoScaler * scale, gint precision)
} }
static void static void
video_scale_h_near_8888 (GstVideoScaler * scale, video_scale_h_near_u32 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width) gpointer src, gpointer dest, guint dest_offset, guint width)
{ {
guint32 *s, *d; guint32 *s, *d;
@ -331,7 +335,7 @@ video_scale_h_near_8888 (GstVideoScaler * scale,
#if 0 #if 0
/* ORC is slower on this */ /* ORC is slower on this */
video_orc_resample_h_near_8888_lq (d, s, 0, scale->inc, width); video_orc_resample_h_near_u32 (d, s, 0, scale->inc, width);
#else #else
{ {
gint i; gint i;
@ -345,7 +349,24 @@ video_scale_h_near_8888 (GstVideoScaler * scale,
} }
static void static void
video_scale_h_2tap_8888 (GstVideoScaler * scale, video_scale_h_near_u64 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
guint64 *s, *d;
gint i;
guint32 *offset;
d = (guint64 *) dest + dest_offset;
s = (guint64 *) src;
offset = scale->resampler.offset + dest_offset;
for (i = 0; i < width; i++)
d[i] = s[offset[i]];
}
static void
video_scale_h_2tap_4u8 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width) gpointer src, gpointer dest, guint dest_offset, guint width)
{ {
guint32 *s, *d; guint32 *s, *d;
@ -353,31 +374,31 @@ video_scale_h_2tap_8888 (GstVideoScaler * scale,
d = (guint32 *) dest + dest_offset; d = (guint32 *) dest + dest_offset;
s = (guint32 *) src; s = (guint32 *) src;
video_orc_resample_h_2tap_8888_lq (d, s, 0, scale->inc, width); video_orc_resample_h_2tap_4u8_lq (d, s, 0, scale->inc, width);
} }
static void static void
video_scale_h_ntap_8888 (GstVideoScaler * scale, video_scale_h_ntap_4u8 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width) gpointer src, gpointer dest, guint dest_offset, guint width)
{ {
gint16 *taps; gint16 *taps;
gint i, max_taps, count; gint i, max_taps, count;
guint8 *d; guint32 *d;
guint32 *offset_n; guint32 *offset_n;
guint32 *pixels; guint32 *pixels;
gint32 *temp; gint32 *temp;
if (scale->taps_s16 == NULL) if (scale->taps_s16 == NULL)
#ifdef LQ #ifdef LQ
make_s16_taps (scale, 6); make_s16_taps (scale, SCALE_U8_LQ);
#else #else
make_s16_taps (scale, S16_SCALE); make_s16_taps (scale, SCALE_U8);
#endif #endif
max_taps = scale->resampler.max_taps; max_taps = scale->resampler.max_taps;
offset_n = scale->offset_n; offset_n = scale->offset_n;
d = (guint8 *) dest + 4 * dest_offset; d = (guint32 *) dest + dest_offset;
/* prepare the arrays FIXME, we can add this into ORC */ /* prepare the arrays FIXME, we can add this into ORC */
count = width * max_taps; count = width * max_taps;
@ -391,43 +412,88 @@ video_scale_h_ntap_8888 (GstVideoScaler * scale,
#ifdef LQ #ifdef LQ
/* first pixels with first tap to t4 */ /* first pixels with first tap to t4 */
video_orc_resample_h_multaps_8_lq (temp, pixels, taps, count); video_orc_resample_h_multaps_u8_lq (temp, pixels, taps, count);
/* add other pixels with other taps to t4 */ /* add other pixels with other taps to t4 */
video_orc_resample_h_muladdtaps_8_lq (temp, 0, pixels + width, count, video_orc_resample_h_muladdtaps_u8_lq (temp, 0, pixels + width, count,
taps + count, count * 2, count, max_taps - 1); taps + count, count * 2, count, max_taps - 1);
/* scale and write final result */ /* scale and write final result */
video_orc_resample_scaletaps_8_lq (d, temp, count); video_orc_resample_scaletaps_u8_lq (d, temp, count);
#else #else
/* first pixels with first tap to t4 */ /* first pixels with first tap to t4 */
video_orc_resample_h_multaps_8 (temp, pixels, taps, count); video_orc_resample_h_multaps_u8 (temp, pixels, taps, count);
/* add other pixels with other taps to t4 */ /* add other pixels with other taps to t4 */
video_orc_resample_h_muladdtaps_8 (temp, 0, pixels + width, count, video_orc_resample_h_muladdtaps_u8 (temp, 0, pixels + width, count,
taps + count, count * 2, count, max_taps - 1); taps + count, count * 2, count, max_taps - 1);
/* scale and write final result */ /* scale and write final result */
video_orc_resample_scaletaps_8 (d, temp, count); video_orc_resample_scaletaps_u8 (d, temp, count);
#endif #endif
} }
static void static void
video_scale_v_near_8888 (GstVideoScaler * scale, video_scale_h_ntap_4u16 (GstVideoScaler * scale,
gpointer src, gpointer dest, guint dest_offset, guint width)
{
gint16 *taps;
gint i, max_taps, count;
guint64 *d;
guint32 *offset_n;
guint64 *pixels;
gint32 *temp;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, SCALE_U16);
max_taps = scale->resampler.max_taps;
offset_n = scale->offset_n;
d = (guint64 *) dest + dest_offset;
/* prepare the arrays FIXME, we can add this into ORC */
count = width * max_taps;
pixels = (guint64 *) scale->tmpline1;
for (i = 0; i < count; i++)
pixels[i] = ((guint64 *) src)[offset_n[i]];
temp = (gint32 *) scale->tmpline2;
taps = scale->taps_s16_4;
count = width * 4;
/* first pixels with first tap to t4 */
video_orc_resample_h_multaps_u16 (temp, pixels, taps, count);
/* add other pixels with other taps to t4 */
video_orc_resample_h_muladdtaps_u16 (temp, 0, pixels + width, count * 2,
taps + count, count * 2, count, max_taps - 1);
/* scale and write final result */
video_orc_resample_scaletaps_u16 (d, temp, count);
}
static void
video_scale_v_near_u32 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width) gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{ {
orc_memcpy (dest, srcs[0], 4 * width); orc_memcpy (dest, srcs[0], 4 * width);
} }
static void static void
video_scale_v_2tap_8888 (GstVideoScaler * scale, video_scale_v_near_u64 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{
orc_memcpy (dest, srcs[0], 8 * width);
}
static void
video_scale_v_2tap_4u8 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width) gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{ {
gint max_taps; gint max_taps;
guint32 *s1, *s2, *d; guint32 *s1, *s2, *d;
guint64 p1; gint16 p1;
if (scale->taps_s16 == NULL) if (scale->taps_s16 == NULL)
#ifdef LQ #ifdef LQ
make_s16_taps (scale, 8); make_s16_taps (scale, SCALE_U8_LQ + 2);
#else #else
make_s16_taps (scale, S16_SCALE); make_s16_taps (scale, SCALE_U8);
#endif #endif
max_taps = scale->resampler.max_taps; max_taps = scale->resampler.max_taps;
@ -438,12 +504,33 @@ video_scale_v_2tap_8888 (GstVideoScaler * scale,
p1 = scale->taps_s16[dest_offset * max_taps + 1]; p1 = scale->taps_s16[dest_offset * max_taps + 1];
#ifdef LQ #ifdef LQ
video_orc_resample_v_2tap_8_lq (d, s1, s2, p1, width * 4); video_orc_resample_v_2tap_u8_lq (d, s1, s2, p1, width * 4);
#else #else
video_orc_resample_v_2tap_8 (d, s1, s2, p1, width * 4); video_orc_resample_v_2tap_u8 (d, s1, s2, p1, width * 4);
#endif #endif
} }
static void
video_scale_v_2tap_4u16 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{
gint max_taps;
guint64 *s1, *s2, *d;
gint16 p1;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, SCALE_U16);
max_taps = scale->resampler.max_taps;
d = (guint64 *) dest;
s1 = (guint64 *) srcs[0];
s2 = (guint64 *) srcs[1];
p1 = scale->taps_s16[dest_offset * max_taps + 1];
video_orc_resample_v_2tap_u16 (d, s1, s2, p1, width * 4);
}
#if 0 #if 0
static void static void
video_scale_h_4tap_8888 (GstVideoScaler * scale, video_scale_h_4tap_8888 (GstVideoScaler * scale,
@ -479,7 +566,7 @@ video_scale_h_4tap_8888 (GstVideoScaler * scale,
#endif #endif
static void static void
video_scale_v_4tap_8888 (GstVideoScaler * scale, video_scale_v_4tap_4u8 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width) gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{ {
gint max_taps; gint max_taps;
@ -489,9 +576,9 @@ video_scale_v_4tap_8888 (GstVideoScaler * scale,
if (scale->taps_s16 == NULL) if (scale->taps_s16 == NULL)
#ifdef LQ #ifdef LQ
make_s16_taps (scale, 6); make_s16_taps (scale, SCALE_U8_LQ);
#else #else
make_s16_taps (scale, S16_SCALE); make_s16_taps (scale, SCALE_U8);
#endif #endif
max_taps = scale->resampler.max_taps; max_taps = scale->resampler.max_taps;
@ -513,32 +600,33 @@ video_scale_v_4tap_8888 (GstVideoScaler * scale,
p4 = taps[3]; p4 = taps[3];
#ifdef LQ #ifdef LQ
video_orc_resample_v_4tap_8_lq (d, s1, s2, s3, s4, p1, p2, p3, p4, width * 4); video_orc_resample_v_4tap_u8_lq (d, s1, s2, s3, s4, p1, p2, p3, p4,
width * 4);
#else #else
video_orc_resample_v_4tap_8 (d, s1, s2, s3, s4, p1, p2, p3, p4, width * 4); video_orc_resample_v_4tap_u8 (d, s1, s2, s3, s4, p1, p2, p3, p4, width * 4);
#endif #endif
} }
static void static void
video_scale_v_ntap_8888 (GstVideoScaler * scale, video_scale_v_ntap_4u8 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width) gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{ {
gint16 *taps; gint16 *taps;
gint i, max_taps, count, src_inc; gint i, max_taps, count, src_inc;
guint8 *d; guint32 *d;
gint32 *temp; gint32 *temp;
if (scale->taps_s16 == NULL) if (scale->taps_s16 == NULL)
#ifdef LQ #ifdef LQ
make_s16_taps (scale, 6); make_s16_taps (scale, SCALE_U8_LQ);
#else #else
make_s16_taps (scale, S16_SCALE); make_s16_taps (scale, SCALE_U8);
#endif #endif
max_taps = scale->resampler.max_taps; max_taps = scale->resampler.max_taps;
taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps); taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
d = (guint8 *) dest; d = (guint32 *) dest;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED) if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2; src_inc = 2;
@ -549,21 +637,55 @@ video_scale_v_ntap_8888 (GstVideoScaler * scale,
count = width * 4; count = width * 4;
#ifdef LQ #ifdef LQ
video_orc_resample_v_multaps_8_lq (temp, srcs[0], taps[0], count); video_orc_resample_v_multaps_u8_lq (temp, srcs[0], taps[0], count);
for (i = 1; i < max_taps - 1; i++) { for (i = 1; i < max_taps; i++) {
video_orc_resample_v_muladdtaps_8_lq (temp, srcs[i * src_inc], taps[i], video_orc_resample_v_muladdtaps_u8_lq (temp, srcs[i * src_inc], taps[i],
count); count);
} }
video_orc_resample_scaletaps_8_lq (d, temp, count); video_orc_resample_scaletaps_u8_lq (d, temp, count);
#else #else
video_orc_resample_v_multaps_8 (temp, srcs[0], taps[0], count); video_orc_resample_v_multaps_u8 (temp, srcs[0], taps[0], count);
for (i = 1; i < max_taps - 1; i++) { for (i = 1; i < max_taps; i++) {
video_orc_resample_v_muladdtaps_8 (temp, srcs[i * src_inc], taps[i], count); video_orc_resample_v_muladdtaps_u8 (temp, srcs[i * src_inc], taps[i],
count);
} }
video_orc_resample_scaletaps_8 (d, temp, count); video_orc_resample_scaletaps_u8 (d, temp, count);
#endif #endif
} }
static void
video_scale_v_ntap_4u16 (GstVideoScaler * scale,
gpointer srcs[], gpointer dest, guint dest_offset, guint width)
{
gint16 *taps;
gint i, max_taps, count, src_inc;
guint64 *d;
gint32 *temp;
if (scale->taps_s16 == NULL)
make_s16_taps (scale, SCALE_U16);
max_taps = scale->resampler.max_taps;
taps = scale->taps_s16 + (scale->resampler.phase[dest_offset] * max_taps);
d = (guint64 *) dest;
if (scale->flags & GST_VIDEO_SCALER_FLAG_INTERLACED)
src_inc = 2;
else
src_inc = 1;
temp = (gint32 *) scale->tmpline1;
count = width * 4;
video_orc_resample_v_multaps_u16 (temp, srcs[0], taps[0], count);
for (i = 1; i < max_taps; i++) {
video_orc_resample_v_muladdtaps_u16 (temp, srcs[i * src_inc], taps[i],
count);
}
video_orc_resample_scaletaps_u16 (d, temp, count);
}
/** /**
* gst_video_scaler_horizontal: * gst_video_scaler_horizontal:
* @scale: a #GstVideoScaler * @scale: a #GstVideoScaler
@ -580,25 +702,56 @@ void
gst_video_scaler_horizontal (GstVideoScaler * scale, GstVideoFormat format, gst_video_scaler_horizontal (GstVideoScaler * scale, GstVideoFormat format,
gpointer src, gpointer dest, guint dest_offset, guint width) gpointer src, gpointer dest, guint dest_offset, guint width)
{ {
gint pstride;
GstVideoScalerHFunc func; GstVideoScalerHFunc func;
const GstVideoFormatInfo *finfo;
g_return_if_fail (scale != NULL); g_return_if_fail (scale != NULL);
g_return_if_fail (src != NULL); g_return_if_fail (src != NULL);
g_return_if_fail (dest != NULL); g_return_if_fail (dest != NULL);
g_return_if_fail (dest_offset + width <= scale->resampler.out_size); g_return_if_fail (dest_offset + width <= scale->resampler.out_size);
switch (scale->resampler.max_taps) { finfo = gst_video_format_get_info (format);
case 1: g_return_if_fail (finfo->n_planes == 1);
func = video_scale_h_near_8888;
pstride = finfo->pixel_stride[0];
g_return_if_fail (pstride == 4 || pstride == 8);
switch (pstride) {
case 4:
switch (scale->resampler.max_taps) {
case 1:
func = video_scale_h_near_u32;
break;
case 2:
func = video_scale_h_2tap_4u8;
break;
default:
func = video_scale_h_ntap_4u8;
break;
}
break; break;
case 2: case 8:
func = video_scale_h_2tap_8888; switch (scale->resampler.max_taps) {
case 1:
func = video_scale_h_near_u64;
break;
default:
func = video_scale_h_ntap_4u16;
break;
}
break; break;
default: default:
func = video_scale_h_ntap_8888; goto no_func;
break;
} }
func (scale, src, dest, dest_offset, width); func (scale, src, dest, dest_offset, width);
return;
no_func:
{
GST_WARNING ("no scaler function for format");
func = NULL;
}
} }
/** /**
@ -619,26 +772,60 @@ void
gst_video_scaler_vertical (GstVideoScaler * scale, GstVideoFormat format, gst_video_scaler_vertical (GstVideoScaler * scale, GstVideoFormat format,
gpointer src_lines[], gpointer dest, guint dest_offset, guint width) gpointer src_lines[], gpointer dest, guint dest_offset, guint width)
{ {
gint pstride;
GstVideoScalerVFunc func; GstVideoScalerVFunc func;
const GstVideoFormatInfo *finfo;
g_return_if_fail (scale != NULL); g_return_if_fail (scale != NULL);
g_return_if_fail (src_lines != NULL); g_return_if_fail (src_lines != NULL);
g_return_if_fail (dest != NULL); g_return_if_fail (dest != NULL);
g_return_if_fail (dest_offset < scale->resampler.out_size); g_return_if_fail (dest_offset < scale->resampler.out_size);
switch (scale->resampler.max_taps) { finfo = gst_video_format_get_info (format);
case 1: g_return_if_fail (finfo->n_planes == 1);
func = video_scale_v_near_8888;
break; pstride = finfo->pixel_stride[0];
case 2: g_return_if_fail (pstride == 4 || pstride == 8);
func = video_scale_v_2tap_8888;
break; switch (pstride) {
case 4: case 4:
func = video_scale_v_4tap_8888; switch (scale->resampler.max_taps) {
case 1:
func = video_scale_v_near_u32;
break;
case 2:
func = video_scale_v_2tap_4u8;
break;
case 4:
func = video_scale_v_4tap_4u8;
break;
default:
func = video_scale_v_ntap_4u8;
break;
}
break;
case 8:
switch (scale->resampler.max_taps) {
case 1:
func = video_scale_v_near_u64;
break;
case 2:
func = video_scale_v_2tap_4u16;
break;
default:
func = video_scale_v_ntap_4u16;
break;
}
break; break;
default: default:
func = video_scale_v_ntap_8888; goto no_func;
break;
} }
func (scale, src_lines, dest, dest_offset, width); func (scale, src_lines, dest, dest_offset, width);
return;
no_func:
{
GST_WARNING ("no scaler function for format");
func = NULL;
}
} }