video-chroma: ORCify 2x vertical upsampling

Make an ORC version of the 2x vertical upsampling code.
Improve unit tests, test chroma up and down sampling.
memset buffer in conversion to make valgrind happy.
This commit is contained in:
Wim Taymans 2014-11-07 12:06:10 +01:00
parent ffb43c0591
commit 39662d0393
5 changed files with 892 additions and 48 deletions

View file

@ -169,11 +169,8 @@ static void \
video_chroma_up_v2_##name (GstVideoChromaResample *resample, \
gpointer lines[], gint width) \
{ \
gint i; \
type *l0 = lines[0]; \
type *l1 = lines[1]; \
type tr0, tr1; \
type tb0, tb1; \
\
if (resample->h_resample) { \
resample->h_resample (resample, l0, width); \
@ -181,15 +178,7 @@ video_chroma_up_v2_##name (GstVideoChromaResample *resample, \
resample->h_resample (resample, l1, width); \
} \
if (l0 != l1) { \
for (i = 0; i < width; i++) { \
tr0 = PR0(i), tr1 = PR1(i); \
tb0 = PB0(i), tb1 = PB1(i); \
\
PR0(i) = FILT_3_1 (tr0, tr1); \
PB0(i) = FILT_3_1 (tb0, tb1); \
PR1(i) = FILT_1_3 (tr0, tr1); \
PB1(i) = FILT_1_3 (tb0, tb1); \
} \
video_orc_chroma_up_v2_##name (l0, l1, l0, l1, width); \
} \
}
/* 2x vertical upsampling interlaced without cositing

View file

@ -348,6 +348,12 @@ void video_orc_chroma_down_h2_u8 (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, int n);
void video_orc_chroma_down_v2_u8 (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n);
void video_orc_chroma_up_v2_u8 (guint8 * ORC_RESTRICT d1,
guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, int n);
void video_orc_chroma_up_v2_u16 (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1,
const guint16 * ORC_RESTRICT s2, int n);
void video_orc_chroma_down_v2_u16 (guint16 * ORC_RESTRICT d1,
const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2, int n);
void video_orc_chroma_down_v4_u8 (guint8 * ORC_RESTRICT d1,
@ -20862,6 +20868,744 @@ video_orc_chroma_down_v2_u8 (guint8 * ORC_RESTRICT d1,
#endif
/* video_orc_chroma_up_v2_u8 */
#ifdef DISABLE_ORC
void
video_orc_chroma_up_v2_u8 (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n)
{
int i;
orc_union32 *ORC_RESTRICT ptr0;
orc_union32 *ORC_RESTRICT ptr1;
const orc_union32 *ORC_RESTRICT ptr4;
const orc_union32 *ORC_RESTRICT ptr5;
orc_union32 var39;
orc_union32 var40;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var41;
#else
orc_union32 var41;
#endif
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var42;
#else
orc_union32 var42;
#endif
orc_union32 var43;
orc_union32 var44;
orc_union16 var45;
orc_union16 var46;
orc_union16 var47;
orc_union16 var48;
orc_union32 var49;
orc_union32 var50;
orc_union32 var51;
orc_union32 var52;
orc_union32 var53;
orc_union32 var54;
orc_union16 var55;
orc_union32 var56;
orc_union32 var57;
orc_union32 var58;
orc_union32 var59;
orc_union16 var60;
ptr0 = (orc_union32 *) d1;
ptr1 = (orc_union32 *) d2;
ptr4 = (orc_union32 *) s1;
ptr5 = (orc_union32 *) s2;
/* 6: loadpw */
var41.x2[0] = (int) 0x00000003; /* 3 or 1.4822e-323f */
var41.x2[1] = (int) 0x00000003; /* 3 or 1.4822e-323f */
/* 9: loadpw */
var42.x2[0] = (int) 0x00000002; /* 2 or 9.88131e-324f */
var42.x2[1] = (int) 0x00000002; /* 2 or 9.88131e-324f */
for (i = 0; i < n; i++) {
/* 0: loadl */
var39 = ptr4[i];
/* 1: splitlw */
{
orc_union32 _src;
_src.i = var39.i;
var45.i = _src.x2[1];
var46.i = _src.x2[0];
}
/* 2: loadl */
var40 = ptr5[i];
/* 3: splitlw */
{
orc_union32 _src;
_src.i = var40.i;
var47.i = _src.x2[1];
var48.i = _src.x2[0];
}
/* 4: convubw */
var49.x2[0] = (orc_uint8) var45.x2[0];
var49.x2[1] = (orc_uint8) var45.x2[1];
/* 5: convubw */
var50.x2[0] = (orc_uint8) var47.x2[0];
var50.x2[1] = (orc_uint8) var47.x2[1];
/* 7: mullw */
var51.x2[0] = (var49.x2[0] * var41.x2[0]) & 0xffff;
var51.x2[1] = (var49.x2[1] * var41.x2[1]) & 0xffff;
/* 8: addw */
var52.x2[0] = var51.x2[0] + var50.x2[0];
var52.x2[1] = var51.x2[1] + var50.x2[1];
/* 10: addw */
var53.x2[0] = var52.x2[0] + var42.x2[0];
var53.x2[1] = var52.x2[1] + var42.x2[1];
/* 11: shruw */
var54.x2[0] = ((orc_uint16) var53.x2[0]) >> 2;
var54.x2[1] = ((orc_uint16) var53.x2[1]) >> 2;
/* 12: convwb */
var55.x2[0] = var54.x2[0];
var55.x2[1] = var54.x2[1];
/* 13: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var46.i;
_dest.x2[1] = var55.i;
var43.i = _dest.i;
}
/* 14: storel */
ptr0[i] = var43;
/* 15: mullw */
var56.x2[0] = (var50.x2[0] * var41.x2[0]) & 0xffff;
var56.x2[1] = (var50.x2[1] * var41.x2[1]) & 0xffff;
/* 16: addw */
var57.x2[0] = var56.x2[0] + var49.x2[0];
var57.x2[1] = var56.x2[1] + var49.x2[1];
/* 17: addw */
var58.x2[0] = var57.x2[0] + var42.x2[0];
var58.x2[1] = var57.x2[1] + var42.x2[1];
/* 18: shruw */
var59.x2[0] = ((orc_uint16) var58.x2[0]) >> 2;
var59.x2[1] = ((orc_uint16) var58.x2[1]) >> 2;
/* 19: convwb */
var60.x2[0] = var59.x2[0];
var60.x2[1] = var59.x2[1];
/* 20: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var48.i;
_dest.x2[1] = var60.i;
var44.i = _dest.i;
}
/* 21: storel */
ptr1[i] = var44;
}
}
#else
static void
_backup_video_orc_chroma_up_v2_u8 (OrcExecutor * ORC_RESTRICT ex)
{
int i;
int n = ex->n;
orc_union32 *ORC_RESTRICT ptr0;
orc_union32 *ORC_RESTRICT ptr1;
const orc_union32 *ORC_RESTRICT ptr4;
const orc_union32 *ORC_RESTRICT ptr5;
orc_union32 var39;
orc_union32 var40;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var41;
#else
orc_union32 var41;
#endif
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var42;
#else
orc_union32 var42;
#endif
orc_union32 var43;
orc_union32 var44;
orc_union16 var45;
orc_union16 var46;
orc_union16 var47;
orc_union16 var48;
orc_union32 var49;
orc_union32 var50;
orc_union32 var51;
orc_union32 var52;
orc_union32 var53;
orc_union32 var54;
orc_union16 var55;
orc_union32 var56;
orc_union32 var57;
orc_union32 var58;
orc_union32 var59;
orc_union16 var60;
ptr0 = (orc_union32 *) ex->arrays[0];
ptr1 = (orc_union32 *) ex->arrays[1];
ptr4 = (orc_union32 *) ex->arrays[4];
ptr5 = (orc_union32 *) ex->arrays[5];
/* 6: loadpw */
var41.x2[0] = (int) 0x00000003; /* 3 or 1.4822e-323f */
var41.x2[1] = (int) 0x00000003; /* 3 or 1.4822e-323f */
/* 9: loadpw */
var42.x2[0] = (int) 0x00000002; /* 2 or 9.88131e-324f */
var42.x2[1] = (int) 0x00000002; /* 2 or 9.88131e-324f */
for (i = 0; i < n; i++) {
/* 0: loadl */
var39 = ptr4[i];
/* 1: splitlw */
{
orc_union32 _src;
_src.i = var39.i;
var45.i = _src.x2[1];
var46.i = _src.x2[0];
}
/* 2: loadl */
var40 = ptr5[i];
/* 3: splitlw */
{
orc_union32 _src;
_src.i = var40.i;
var47.i = _src.x2[1];
var48.i = _src.x2[0];
}
/* 4: convubw */
var49.x2[0] = (orc_uint8) var45.x2[0];
var49.x2[1] = (orc_uint8) var45.x2[1];
/* 5: convubw */
var50.x2[0] = (orc_uint8) var47.x2[0];
var50.x2[1] = (orc_uint8) var47.x2[1];
/* 7: mullw */
var51.x2[0] = (var49.x2[0] * var41.x2[0]) & 0xffff;
var51.x2[1] = (var49.x2[1] * var41.x2[1]) & 0xffff;
/* 8: addw */
var52.x2[0] = var51.x2[0] + var50.x2[0];
var52.x2[1] = var51.x2[1] + var50.x2[1];
/* 10: addw */
var53.x2[0] = var52.x2[0] + var42.x2[0];
var53.x2[1] = var52.x2[1] + var42.x2[1];
/* 11: shruw */
var54.x2[0] = ((orc_uint16) var53.x2[0]) >> 2;
var54.x2[1] = ((orc_uint16) var53.x2[1]) >> 2;
/* 12: convwb */
var55.x2[0] = var54.x2[0];
var55.x2[1] = var54.x2[1];
/* 13: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var46.i;
_dest.x2[1] = var55.i;
var43.i = _dest.i;
}
/* 14: storel */
ptr0[i] = var43;
/* 15: mullw */
var56.x2[0] = (var50.x2[0] * var41.x2[0]) & 0xffff;
var56.x2[1] = (var50.x2[1] * var41.x2[1]) & 0xffff;
/* 16: addw */
var57.x2[0] = var56.x2[0] + var49.x2[0];
var57.x2[1] = var56.x2[1] + var49.x2[1];
/* 17: addw */
var58.x2[0] = var57.x2[0] + var42.x2[0];
var58.x2[1] = var57.x2[1] + var42.x2[1];
/* 18: shruw */
var59.x2[0] = ((orc_uint16) var58.x2[0]) >> 2;
var59.x2[1] = ((orc_uint16) var58.x2[1]) >> 2;
/* 19: convwb */
var60.x2[0] = var59.x2[0];
var60.x2[1] = var59.x2[1];
/* 20: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var48.i;
_dest.x2[1] = var60.i;
var44.i = _dest.i;
}
/* 21: storel */
ptr1[i] = var44;
}
}
void
video_orc_chroma_up_v2_u8 (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n)
{
OrcExecutor _ex, *ex = &_ex;
static volatile int p_inited = 0;
static OrcCode *c = 0;
void (*func) (OrcExecutor *);
if (!p_inited) {
orc_once_mutex_lock ();
if (!p_inited) {
OrcProgram *p;
#if 1
static const orc_uint8 bc[] = {
1, 9, 25, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 104, 114,
111, 109, 97, 95, 117, 112, 95, 118, 50, 95, 117, 56, 11, 4, 4, 11,
4, 4, 12, 4, 4, 12, 4, 4, 14, 2, 3, 0, 0, 0, 14, 2,
2, 0, 0, 0, 20, 2, 20, 2, 20, 2, 20, 2, 20, 4, 20, 4,
20, 4, 198, 34, 32, 4, 198, 35, 33, 5, 21, 1, 150, 36, 34, 21,
1, 150, 37, 35, 21, 1, 89, 38, 36, 16, 21, 1, 70, 38, 38, 37,
21, 1, 70, 38, 38, 17, 21, 1, 95, 38, 38, 17, 21, 1, 157, 34,
38, 195, 0, 32, 34, 21, 1, 89, 38, 37, 16, 21, 1, 70, 38, 38,
36, 21, 1, 70, 38, 38, 17, 21, 1, 95, 38, 38, 17, 21, 1, 157,
35, 38, 195, 1, 33, 35, 2, 0,
};
p = orc_program_new_from_static_bytecode (bc);
orc_program_set_backup_function (p, _backup_video_orc_chroma_up_v2_u8);
#else
p = orc_program_new ();
orc_program_set_name (p, "video_orc_chroma_up_v2_u8");
orc_program_set_backup_function (p, _backup_video_orc_chroma_up_v2_u8);
orc_program_add_destination (p, 4, "d1");
orc_program_add_destination (p, 4, "d2");
orc_program_add_source (p, 4, "s1");
orc_program_add_source (p, 4, "s2");
orc_program_add_constant (p, 2, 0x00000003, "c1");
orc_program_add_constant (p, 2, 0x00000002, "c2");
orc_program_add_temporary (p, 2, "t1");
orc_program_add_temporary (p, 2, "t2");
orc_program_add_temporary (p, 2, "t3");
orc_program_add_temporary (p, 2, "t4");
orc_program_add_temporary (p, 4, "t5");
orc_program_add_temporary (p, 4, "t6");
orc_program_add_temporary (p, 4, "t7");
orc_program_append_2 (p, "splitlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_S1,
ORC_VAR_D1);
orc_program_append_2 (p, "splitlw", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_S2,
ORC_VAR_D1);
orc_program_append_2 (p, "convubw", 1, ORC_VAR_T5, ORC_VAR_T3, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "convubw", 1, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "mullw", 1, ORC_VAR_T7, ORC_VAR_T5, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
ORC_VAR_D1);
orc_program_append_2 (p, "shruw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
ORC_VAR_D1);
orc_program_append_2 (p, "convwb", 1, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T3,
ORC_VAR_D1);
orc_program_append_2 (p, "mullw", 1, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
ORC_VAR_D1);
orc_program_append_2 (p, "shruw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
ORC_VAR_D1);
orc_program_append_2 (p, "convwb", 1, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_T4,
ORC_VAR_D1);
#endif
orc_program_compile (p);
c = orc_program_take_code (p);
orc_program_free (p);
}
p_inited = TRUE;
orc_once_mutex_unlock ();
}
ex->arrays[ORC_VAR_A2] = c;
ex->program = 0;
ex->n = n;
ex->arrays[ORC_VAR_D1] = d1;
ex->arrays[ORC_VAR_D2] = d2;
ex->arrays[ORC_VAR_S1] = (void *) s1;
ex->arrays[ORC_VAR_S2] = (void *) s2;
func = c->exec;
func (ex);
}
#endif
/* video_orc_chroma_up_v2_u16 */
#ifdef DISABLE_ORC
void
video_orc_chroma_up_v2_u16 (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1,
const guint16 * ORC_RESTRICT s2, int n)
{
int i;
orc_union64 *ORC_RESTRICT ptr0;
orc_union64 *ORC_RESTRICT ptr1;
const orc_union64 *ORC_RESTRICT ptr4;
const orc_union64 *ORC_RESTRICT ptr5;
orc_union64 var39;
orc_union64 var40;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union64 var41;
#else
orc_union64 var41;
#endif
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union64 var42;
#else
orc_union64 var42;
#endif
orc_union64 var43;
orc_union64 var44;
orc_union32 var45;
orc_union32 var46;
orc_union32 var47;
orc_union32 var48;
orc_union64 var49;
orc_union64 var50;
orc_union64 var51;
orc_union64 var52;
orc_union64 var53;
orc_union64 var54;
orc_union32 var55;
orc_union64 var56;
orc_union64 var57;
orc_union64 var58;
orc_union64 var59;
orc_union32 var60;
ptr0 = (orc_union64 *) d1;
ptr1 = (orc_union64 *) d2;
ptr4 = (orc_union64 *) s1;
ptr5 = (orc_union64 *) s2;
/* 6: loadpl */
var41.x2[0] = (int) 0x00000003; /* 3 or 1.4822e-323f */
var41.x2[1] = (int) 0x00000003; /* 3 or 1.4822e-323f */
/* 9: loadpl */
var42.x2[0] = (int) 0x00000002; /* 2 or 9.88131e-324f */
var42.x2[1] = (int) 0x00000002; /* 2 or 9.88131e-324f */
for (i = 0; i < n; i++) {
/* 0: loadq */
var39 = ptr4[i];
/* 1: splitql */
{
orc_union64 _src;
_src.i = var39.i;
var45.i = _src.x2[1];
var46.i = _src.x2[0];
}
/* 2: loadq */
var40 = ptr5[i];
/* 3: splitql */
{
orc_union64 _src;
_src.i = var40.i;
var47.i = _src.x2[1];
var48.i = _src.x2[0];
}
/* 4: convuwl */
var49.x2[0] = (orc_uint16) var45.x2[0];
var49.x2[1] = (orc_uint16) var45.x2[1];
/* 5: convuwl */
var50.x2[0] = (orc_uint16) var47.x2[0];
var50.x2[1] = (orc_uint16) var47.x2[1];
/* 7: mulll */
var51.x2[0] =
(((orc_uint32) var49.x2[0]) * ((orc_uint32) var41.x2[0])) & 0xffffffff;
var51.x2[1] =
(((orc_uint32) var49.x2[1]) * ((orc_uint32) var41.x2[1])) & 0xffffffff;
/* 8: addl */
var52.x2[0] = ((orc_uint32) var51.x2[0]) + ((orc_uint32) var50.x2[0]);
var52.x2[1] = ((orc_uint32) var51.x2[1]) + ((orc_uint32) var50.x2[1]);
/* 10: addl */
var53.x2[0] = ((orc_uint32) var52.x2[0]) + ((orc_uint32) var42.x2[0]);
var53.x2[1] = ((orc_uint32) var52.x2[1]) + ((orc_uint32) var42.x2[1]);
/* 11: shrul */
var54.x2[0] = ((orc_uint32) var53.x2[0]) >> 2;
var54.x2[1] = ((orc_uint32) var53.x2[1]) >> 2;
/* 12: convlw */
var55.x2[0] = var54.x2[0];
var55.x2[1] = var54.x2[1];
/* 13: mergelq */
{
orc_union64 _dest;
_dest.x2[0] = var46.i;
_dest.x2[1] = var55.i;
var43.i = _dest.i;
}
/* 14: storeq */
ptr0[i] = var43;
/* 15: mulll */
var56.x2[0] =
(((orc_uint32) var50.x2[0]) * ((orc_uint32) var41.x2[0])) & 0xffffffff;
var56.x2[1] =
(((orc_uint32) var50.x2[1]) * ((orc_uint32) var41.x2[1])) & 0xffffffff;
/* 16: addl */
var57.x2[0] = ((orc_uint32) var56.x2[0]) + ((orc_uint32) var49.x2[0]);
var57.x2[1] = ((orc_uint32) var56.x2[1]) + ((orc_uint32) var49.x2[1]);
/* 17: addl */
var58.x2[0] = ((orc_uint32) var57.x2[0]) + ((orc_uint32) var42.x2[0]);
var58.x2[1] = ((orc_uint32) var57.x2[1]) + ((orc_uint32) var42.x2[1]);
/* 18: shrul */
var59.x2[0] = ((orc_uint32) var58.x2[0]) >> 2;
var59.x2[1] = ((orc_uint32) var58.x2[1]) >> 2;
/* 19: convlw */
var60.x2[0] = var59.x2[0];
var60.x2[1] = var59.x2[1];
/* 20: mergelq */
{
orc_union64 _dest;
_dest.x2[0] = var48.i;
_dest.x2[1] = var60.i;
var44.i = _dest.i;
}
/* 21: storeq */
ptr1[i] = var44;
}
}
#else
static void
_backup_video_orc_chroma_up_v2_u16 (OrcExecutor * ORC_RESTRICT ex)
{
int i;
int n = ex->n;
orc_union64 *ORC_RESTRICT ptr0;
orc_union64 *ORC_RESTRICT ptr1;
const orc_union64 *ORC_RESTRICT ptr4;
const orc_union64 *ORC_RESTRICT ptr5;
orc_union64 var39;
orc_union64 var40;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union64 var41;
#else
orc_union64 var41;
#endif
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union64 var42;
#else
orc_union64 var42;
#endif
orc_union64 var43;
orc_union64 var44;
orc_union32 var45;
orc_union32 var46;
orc_union32 var47;
orc_union32 var48;
orc_union64 var49;
orc_union64 var50;
orc_union64 var51;
orc_union64 var52;
orc_union64 var53;
orc_union64 var54;
orc_union32 var55;
orc_union64 var56;
orc_union64 var57;
orc_union64 var58;
orc_union64 var59;
orc_union32 var60;
ptr0 = (orc_union64 *) ex->arrays[0];
ptr1 = (orc_union64 *) ex->arrays[1];
ptr4 = (orc_union64 *) ex->arrays[4];
ptr5 = (orc_union64 *) ex->arrays[5];
/* 6: loadpl */
var41.x2[0] = (int) 0x00000003; /* 3 or 1.4822e-323f */
var41.x2[1] = (int) 0x00000003; /* 3 or 1.4822e-323f */
/* 9: loadpl */
var42.x2[0] = (int) 0x00000002; /* 2 or 9.88131e-324f */
var42.x2[1] = (int) 0x00000002; /* 2 or 9.88131e-324f */
for (i = 0; i < n; i++) {
/* 0: loadq */
var39 = ptr4[i];
/* 1: splitql */
{
orc_union64 _src;
_src.i = var39.i;
var45.i = _src.x2[1];
var46.i = _src.x2[0];
}
/* 2: loadq */
var40 = ptr5[i];
/* 3: splitql */
{
orc_union64 _src;
_src.i = var40.i;
var47.i = _src.x2[1];
var48.i = _src.x2[0];
}
/* 4: convuwl */
var49.x2[0] = (orc_uint16) var45.x2[0];
var49.x2[1] = (orc_uint16) var45.x2[1];
/* 5: convuwl */
var50.x2[0] = (orc_uint16) var47.x2[0];
var50.x2[1] = (orc_uint16) var47.x2[1];
/* 7: mulll */
var51.x2[0] =
(((orc_uint32) var49.x2[0]) * ((orc_uint32) var41.x2[0])) & 0xffffffff;
var51.x2[1] =
(((orc_uint32) var49.x2[1]) * ((orc_uint32) var41.x2[1])) & 0xffffffff;
/* 8: addl */
var52.x2[0] = ((orc_uint32) var51.x2[0]) + ((orc_uint32) var50.x2[0]);
var52.x2[1] = ((orc_uint32) var51.x2[1]) + ((orc_uint32) var50.x2[1]);
/* 10: addl */
var53.x2[0] = ((orc_uint32) var52.x2[0]) + ((orc_uint32) var42.x2[0]);
var53.x2[1] = ((orc_uint32) var52.x2[1]) + ((orc_uint32) var42.x2[1]);
/* 11: shrul */
var54.x2[0] = ((orc_uint32) var53.x2[0]) >> 2;
var54.x2[1] = ((orc_uint32) var53.x2[1]) >> 2;
/* 12: convlw */
var55.x2[0] = var54.x2[0];
var55.x2[1] = var54.x2[1];
/* 13: mergelq */
{
orc_union64 _dest;
_dest.x2[0] = var46.i;
_dest.x2[1] = var55.i;
var43.i = _dest.i;
}
/* 14: storeq */
ptr0[i] = var43;
/* 15: mulll */
var56.x2[0] =
(((orc_uint32) var50.x2[0]) * ((orc_uint32) var41.x2[0])) & 0xffffffff;
var56.x2[1] =
(((orc_uint32) var50.x2[1]) * ((orc_uint32) var41.x2[1])) & 0xffffffff;
/* 16: addl */
var57.x2[0] = ((orc_uint32) var56.x2[0]) + ((orc_uint32) var49.x2[0]);
var57.x2[1] = ((orc_uint32) var56.x2[1]) + ((orc_uint32) var49.x2[1]);
/* 17: addl */
var58.x2[0] = ((orc_uint32) var57.x2[0]) + ((orc_uint32) var42.x2[0]);
var58.x2[1] = ((orc_uint32) var57.x2[1]) + ((orc_uint32) var42.x2[1]);
/* 18: shrul */
var59.x2[0] = ((orc_uint32) var58.x2[0]) >> 2;
var59.x2[1] = ((orc_uint32) var58.x2[1]) >> 2;
/* 19: convlw */
var60.x2[0] = var59.x2[0];
var60.x2[1] = var59.x2[1];
/* 20: mergelq */
{
orc_union64 _dest;
_dest.x2[0] = var48.i;
_dest.x2[1] = var60.i;
var44.i = _dest.i;
}
/* 21: storeq */
ptr1[i] = var44;
}
}
void
video_orc_chroma_up_v2_u16 (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1,
const guint16 * ORC_RESTRICT s2, int n)
{
OrcExecutor _ex, *ex = &_ex;
static volatile int p_inited = 0;
static OrcCode *c = 0;
void (*func) (OrcExecutor *);
if (!p_inited) {
orc_once_mutex_lock ();
if (!p_inited) {
OrcProgram *p;
#if 1
static const orc_uint8 bc[] = {
1, 9, 26, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 104, 114,
111, 109, 97, 95, 117, 112, 95, 118, 50, 95, 117, 49, 54, 11, 8, 8,
11, 8, 8, 12, 8, 8, 12, 8, 8, 14, 4, 3, 0, 0, 0, 14,
4, 2, 0, 0, 0, 20, 4, 20, 4, 20, 4, 20, 4, 20, 8, 20,
8, 20, 8, 197, 34, 32, 4, 197, 35, 33, 5, 21, 1, 154, 36, 34,
21, 1, 154, 37, 35, 21, 1, 120, 38, 36, 16, 21, 1, 103, 38, 38,
37, 21, 1, 103, 38, 38, 17, 21, 1, 126, 38, 38, 17, 21, 1, 163,
34, 38, 194, 0, 32, 34, 21, 1, 120, 38, 37, 16, 21, 1, 103, 38,
38, 36, 21, 1, 103, 38, 38, 17, 21, 1, 126, 38, 38, 17, 21, 1,
163, 35, 38, 194, 1, 33, 35, 2, 0,
};
p = orc_program_new_from_static_bytecode (bc);
orc_program_set_backup_function (p, _backup_video_orc_chroma_up_v2_u16);
#else
p = orc_program_new ();
orc_program_set_name (p, "video_orc_chroma_up_v2_u16");
orc_program_set_backup_function (p, _backup_video_orc_chroma_up_v2_u16);
orc_program_add_destination (p, 8, "d1");
orc_program_add_destination (p, 8, "d2");
orc_program_add_source (p, 8, "s1");
orc_program_add_source (p, 8, "s2");
orc_program_add_constant (p, 4, 0x00000003, "c1");
orc_program_add_constant (p, 4, 0x00000002, "c2");
orc_program_add_temporary (p, 4, "t1");
orc_program_add_temporary (p, 4, "t2");
orc_program_add_temporary (p, 4, "t3");
orc_program_add_temporary (p, 4, "t4");
orc_program_add_temporary (p, 8, "t5");
orc_program_add_temporary (p, 8, "t6");
orc_program_add_temporary (p, 8, "t7");
orc_program_append_2 (p, "splitql", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_S1,
ORC_VAR_D1);
orc_program_append_2 (p, "splitql", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_S2,
ORC_VAR_D1);
orc_program_append_2 (p, "convuwl", 1, ORC_VAR_T5, ORC_VAR_T3, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "convuwl", 1, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "mulll", 1, ORC_VAR_T7, ORC_VAR_T5, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "addl", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6,
ORC_VAR_D1);
orc_program_append_2 (p, "addl", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
ORC_VAR_D1);
orc_program_append_2 (p, "shrul", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
ORC_VAR_D1);
orc_program_append_2 (p, "convlw", 1, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "mergelq", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T3,
ORC_VAR_D1);
orc_program_append_2 (p, "mulll", 1, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "addl", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5,
ORC_VAR_D1);
orc_program_append_2 (p, "addl", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
ORC_VAR_D1);
orc_program_append_2 (p, "shrul", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2,
ORC_VAR_D1);
orc_program_append_2 (p, "convlw", 1, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "mergelq", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_T4,
ORC_VAR_D1);
#endif
orc_program_compile (p);
c = orc_program_take_code (p);
orc_program_free (p);
}
p_inited = TRUE;
orc_once_mutex_unlock ();
}
ex->arrays[ORC_VAR_A2] = c;
ex->program = 0;
ex->n = n;
ex->arrays[ORC_VAR_D1] = d1;
ex->arrays[ORC_VAR_D2] = d2;
ex->arrays[ORC_VAR_S1] = (void *) s1;
ex->arrays[ORC_VAR_S2] = (void *) s2;
func = c->exec;
func (ex);
}
#endif
/* video_orc_chroma_down_v2_u16 */
#ifdef DISABLE_ORC
void

View file

@ -177,6 +177,8 @@ void video_orc_resample_v_multaps_u8_lq (gint32 * ORC_RESTRICT d1, const guint32
void video_orc_resample_v_muladdtaps_u8_lq (gint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int p1, int n);
void video_orc_chroma_down_h2_u8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n);
void video_orc_chroma_down_v2_u8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n);
void video_orc_chroma_up_v2_u8 (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n);
void video_orc_chroma_up_v2_u16 (guint16 * ORC_RESTRICT d1, guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2, int n);
void video_orc_chroma_down_v2_u16 (guint16 * ORC_RESTRICT d1, const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2, int n);
void video_orc_chroma_down_v4_u8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int n);
void video_orc_chroma_down_v4_u16 (guint16 * ORC_RESTRICT d1, const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2, const guint16 * ORC_RESTRICT s3, const guint16 * ORC_RESTRICT s4, int n);

View file

@ -1672,6 +1672,70 @@ select1lw uv2, s2
x2 avgub uv1, uv1, uv2
mergewl d, ay1, uv1
.function video_orc_chroma_up_v2_u8
.source 4 s1 guint8
.source 4 s2 guint8
.dest 4 d1 guint8
.dest 4 d2 guint8
.temp 2 ay1
.temp 2 ay2
.temp 2 uv1
.temp 2 uv2
.temp 4 uuvv1
.temp 4 uuvv2
.temp 4 uuvv3
splitlw uv1, ay1, s1
splitlw uv2, ay2, s2
x2 convubw uuvv1, uv1
x2 convubw uuvv2, uv2
x2 mullw uuvv3, uuvv1, 3
x2 addw uuvv3, uuvv3, uuvv2
x2 addw uuvv3, uuvv3, 2
x2 shruw uuvv3, uuvv3, 2
x2 convwb uv1, uuvv3
mergewl d1, ay1, uv1
x2 mullw uuvv3, uuvv2, 3
x2 addw uuvv3, uuvv3, uuvv1
x2 addw uuvv3, uuvv3, 2
x2 shruw uuvv3, uuvv3, 2
x2 convwb uv2, uuvv3
mergewl d2, ay2, uv2
.function video_orc_chroma_up_v2_u16
.source 8 s1 guint16
.source 8 s2 guint16
.dest 8 d1 guint16
.dest 8 d2 guint16
.temp 4 ay1
.temp 4 ay2
.temp 4 uv1
.temp 4 uv2
.temp 8 uuvv1
.temp 8 uuvv2
.temp 8 uuvv3
splitql uv1, ay1, s1
splitql uv2, ay2, s2
x2 convuwl uuvv1, uv1
x2 convuwl uuvv2, uv2
x2 mulll uuvv3, uuvv1, 3
x2 addl uuvv3, uuvv3, uuvv2
x2 addl uuvv3, uuvv3, 2
x2 shrul uuvv3, uuvv3, 2
x2 convlw uv1, uuvv3
mergelq d1, ay1, uv1
x2 mulll uuvv3, uuvv2, 3
x2 addl uuvv3, uuvv3, uuvv1
x2 addl uuvv3, uuvv3, 2
x2 shrul uuvv3, uuvv3, 2
x2 convlw uv2, uuvv3
mergelq d2, ay2, uv2
.function video_orc_chroma_down_v2_u16
.source 8 s1 guint16
.source 8 s2 guint16

View file

@ -1801,10 +1801,10 @@ GST_START_TEST (test_video_pack_unpack2)
GTimer *timer;
gint num_formats;
#define WIDTH 1920
#define HEIGHT 1080
#define WIDTH 320
#define HEIGHT 240
/* set to something larger to do benchmarks */
#define TIME 0.0
#define TIME 0.01
timer = g_timer_new ();
@ -1880,8 +1880,9 @@ GST_START_TEST (test_video_pack_unpack2)
/* compare the frame */
diff = compare_frame (finfo, depth, outpixels, pixels, WIDTH, HEIGHT);
GST_DEBUG ("%f \t %f \t %f \t %f \t %s", pack_sec, unpack_sec,
info.size * pack_sec, info.size * unpack_sec, finfo->name);
GST_DEBUG ("%f \t %f \t %f \t %f \t %s %d/%f", pack_sec, unpack_sec,
info.size * pack_sec, info.size * unpack_sec, finfo->name, count,
elapsed);
if (diff != 0) {
gst_util_dump_mem (outpixels, 128);
@ -1901,50 +1902,89 @@ GST_END_TEST;
#undef HEIGHT
#undef TIME
#define WIDTH 1920
#define HEIGHT 1080
#define WIDTH 320
#define HEIGHT 240
#define TIME 0.1
#define GET_LINE(l) (pixels + CLAMP (l, 0, HEIGHT-1) * WIDTH * 4)
GST_START_TEST (test_video_chroma)
{
guint8 *pixels;
GstVideoChromaResample *resample;
guint n_lines;
gint i, j, offset, count;
gpointer lines[2];
gint i, j, k, offset, count;
gpointer lines[10];
GTimer *timer;
gdouble elapsed, subsample_sec;
GstVideoChromaSite sites[] = {
GST_VIDEO_CHROMA_SITE_NONE,
GST_VIDEO_CHROMA_SITE_H_COSITED,
};
timer = g_timer_new ();
pixels = make_pixels (8, WIDTH, HEIGHT);
resample = gst_video_chroma_resample_new (GST_VIDEO_CHROMA_METHOD_LINEAR,
GST_VIDEO_CHROMA_SITE_NONE, GST_VIDEO_CHROMA_FLAG_NONE,
GST_VIDEO_FORMAT_AYUV, -1, -1);
for (k = 0; k < G_N_ELEMENTS (sites); k++) {
GstVideoChromaResample *resample;
gst_video_chroma_resample_get_info (resample, &n_lines, &offset);
fail_unless (n_lines == 2);
fail_unless (offset == 0);
resample = gst_video_chroma_resample_new (GST_VIDEO_CHROMA_METHOD_LINEAR,
sites[k], GST_VIDEO_CHROMA_FLAG_NONE, GST_VIDEO_FORMAT_AYUV, -1, -1);
count = 0;
g_timer_start (timer);
while (TRUE) {
for (i = 0; i < HEIGHT; i += n_lines) {
for (j = 0; j < n_lines; j++)
lines[j] = GET_LINE (i + offset + j);
gst_video_chroma_resample_get_info (resample, &n_lines, &offset);
fail_unless (n_lines < 10);
gst_video_chroma_resample (resample, lines, WIDTH);
/* warmup */
for (j = 0; j < n_lines; j++)
lines[j] = GET_LINE (offset + j);
gst_video_chroma_resample (resample, lines, WIDTH);
count = 0;
g_timer_start (timer);
while (TRUE) {
for (i = 0; i < HEIGHT; i += n_lines) {
for (j = 0; j < n_lines; j++)
lines[j] = GET_LINE (i + offset + j);
gst_video_chroma_resample (resample, lines, WIDTH);
}
count++;
elapsed = g_timer_elapsed (timer, NULL);
if (elapsed >= TIME)
break;
}
count++;
elapsed = g_timer_elapsed (timer, NULL);
if (elapsed >= TIME)
break;
subsample_sec = count / elapsed;
GST_DEBUG ("%f downsamples/sec %d/%f", subsample_sec, count, elapsed);
gst_video_chroma_resample_free (resample);
resample = gst_video_chroma_resample_new (GST_VIDEO_CHROMA_METHOD_LINEAR,
sites[k], GST_VIDEO_CHROMA_FLAG_NONE, GST_VIDEO_FORMAT_AYUV, 1, 1);
gst_video_chroma_resample_get_info (resample, &n_lines, &offset);
fail_unless (n_lines < 10);
/* warmup */
for (j = 0; j < n_lines; j++)
lines[j] = GET_LINE (offset + j);
gst_video_chroma_resample (resample, lines, WIDTH);
count = 0;
g_timer_start (timer);
while (TRUE) {
for (i = 0; i < HEIGHT; i += n_lines) {
for (j = 0; j < n_lines; j++)
lines[j] = GET_LINE (i + offset + j);
gst_video_chroma_resample (resample, lines, WIDTH);
}
count++;
elapsed = g_timer_elapsed (timer, NULL);
if (elapsed >= TIME)
break;
}
subsample_sec = count / elapsed;
GST_DEBUG ("%f upsamples/sec %d/%f", subsample_sec, count, elapsed);
gst_video_chroma_resample_free (resample);
}
subsample_sec = count / elapsed;
GST_DEBUG ("%f subsamples/sec", subsample_sec);
gst_video_chroma_resample_free (resample);
g_free (pixels);
g_timer_destroy (timer);
}
@ -1968,9 +2008,9 @@ GST_START_TEST (test_video_scaler)
GST_END_TEST;
#define WIDTH 192
#define HEIGHT 108
#define TIME 0.0
#define WIDTH 320
#define HEIGHT 240
#define TIME 0.01
#define GET_LINE(l) (pixels + CLAMP (l, 0, HEIGHT-1) * WIDTH * 4)
typedef struct
@ -2011,6 +2051,7 @@ GST_START_TEST (test_video_color_convert)
gst_video_info_set_format (&ininfo, infmt, WIDTH, HEIGHT);
inbuffer = gst_buffer_new_and_alloc (ininfo.size);
gst_buffer_memset (inbuffer, 0, 0, -1);
gst_video_frame_map (&inframe, &ininfo, inbuffer, GST_MAP_READ);
for (outfmt = GST_VIDEO_FORMAT_I420; outfmt < num_formats; outfmt++) {
@ -2027,6 +2068,8 @@ GST_START_TEST (test_video_color_convert)
gst_video_frame_map (&outframe, &outinfo, outbuffer, GST_MAP_WRITE);
convert = gst_video_converter_new (&ininfo, &outinfo, NULL);
/* warmup */
gst_video_converter_frame (convert, &inframe, &outframe);
count = 0;
g_timer_start (timer);
@ -2043,9 +2086,9 @@ GST_START_TEST (test_video_color_convert)
res.outfmt = outfmt;
res.convert_sec = count / elapsed;
GST_DEBUG ("%f conversions/sec %s->%s", res.convert_sec,
GST_DEBUG ("%f conversions/sec %s->%s, %d/%f", res.convert_sec,
gst_video_format_to_string (infmt),
gst_video_format_to_string (outfmt));
gst_video_format_to_string (outfmt), count, elapsed);
g_array_append_val (array, res);
@ -2083,6 +2126,8 @@ video_suite (void)
Suite *s = suite_create ("video support library");
TCase *tc_chain = tcase_create ("general");
tcase_set_timeout (tc_chain, 60 * 60);
suite_add_tcase (s, tc_chain);
tcase_add_test (tc_chain, test_video_formats);
tcase_add_test (tc_chain, test_video_formats_rgb);