diff --git a/gst-libs/gst/video/video-chroma.c b/gst-libs/gst/video/video-chroma.c index aab0ac2fbe..fc44a21589 100644 --- a/gst-libs/gst/video/video-chroma.c +++ b/gst-libs/gst/video/video-chroma.c @@ -169,11 +169,8 @@ static void \ video_chroma_up_v2_##name (GstVideoChromaResample *resample, \ gpointer lines[], gint width) \ { \ - gint i; \ type *l0 = lines[0]; \ type *l1 = lines[1]; \ - type tr0, tr1; \ - type tb0, tb1; \ \ if (resample->h_resample) { \ resample->h_resample (resample, l0, width); \ @@ -181,15 +178,7 @@ video_chroma_up_v2_##name (GstVideoChromaResample *resample, \ resample->h_resample (resample, l1, width); \ } \ if (l0 != l1) { \ - for (i = 0; i < width; i++) { \ - tr0 = PR0(i), tr1 = PR1(i); \ - tb0 = PB0(i), tb1 = PB1(i); \ - \ - PR0(i) = FILT_3_1 (tr0, tr1); \ - PB0(i) = FILT_3_1 (tb0, tb1); \ - PR1(i) = FILT_1_3 (tr0, tr1); \ - PB1(i) = FILT_1_3 (tb0, tb1); \ - } \ + video_orc_chroma_up_v2_##name (l0, l1, l0, l1, width); \ } \ } /* 2x vertical upsampling interlaced without cositing diff --git a/gst-libs/gst/video/video-orc-dist.c b/gst-libs/gst/video/video-orc-dist.c index 9df7dce44f..97b1b1106a 100644 --- a/gst-libs/gst/video/video-orc-dist.c +++ b/gst-libs/gst/video/video-orc-dist.c @@ -348,6 +348,12 @@ void video_orc_chroma_down_h2_u8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); void video_orc_chroma_down_v2_u8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n); +void video_orc_chroma_up_v2_u8 (guint8 * ORC_RESTRICT d1, + guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, + const guint8 * ORC_RESTRICT s2, int n); +void video_orc_chroma_up_v2_u16 (guint16 * ORC_RESTRICT d1, + guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1, + const guint16 * ORC_RESTRICT s2, int n); void video_orc_chroma_down_v2_u16 (guint16 * ORC_RESTRICT d1, const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2, int n); void video_orc_chroma_down_v4_u8 (guint8 * ORC_RESTRICT d1, @@ -20862,6 +20868,744 @@ video_orc_chroma_down_v2_u8 (guint8 * ORC_RESTRICT d1, #endif +/* video_orc_chroma_up_v2_u8 */ +#ifdef DISABLE_ORC +void +video_orc_chroma_up_v2_u8 (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n) +{ + int i; + orc_union32 *ORC_RESTRICT ptr0; + orc_union32 *ORC_RESTRICT ptr1; + const orc_union32 *ORC_RESTRICT ptr4; + const orc_union32 *ORC_RESTRICT ptr5; + orc_union32 var39; + orc_union32 var40; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var41; +#else + orc_union32 var41; +#endif +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var42; +#else + orc_union32 var42; +#endif + orc_union32 var43; + orc_union32 var44; + orc_union16 var45; + orc_union16 var46; + orc_union16 var47; + orc_union16 var48; + orc_union32 var49; + orc_union32 var50; + orc_union32 var51; + orc_union32 var52; + orc_union32 var53; + orc_union32 var54; + orc_union16 var55; + orc_union32 var56; + orc_union32 var57; + orc_union32 var58; + orc_union32 var59; + orc_union16 var60; + + ptr0 = (orc_union32 *) d1; + ptr1 = (orc_union32 *) d2; + ptr4 = (orc_union32 *) s1; + ptr5 = (orc_union32 *) s2; + + /* 6: loadpw */ + var41.x2[0] = (int) 0x00000003; /* 3 or 1.4822e-323f */ + var41.x2[1] = (int) 0x00000003; /* 3 or 1.4822e-323f */ + /* 9: loadpw */ + var42.x2[0] = (int) 0x00000002; /* 2 or 9.88131e-324f */ + var42.x2[1] = (int) 0x00000002; /* 2 or 9.88131e-324f */ + + for (i = 0; i < n; i++) { + /* 0: loadl */ + var39 = ptr4[i]; + /* 1: splitlw */ + { + orc_union32 _src; + _src.i = var39.i; + var45.i = _src.x2[1]; + var46.i = _src.x2[0]; + } + /* 2: loadl */ + var40 = ptr5[i]; + /* 3: splitlw */ + { + orc_union32 _src; + _src.i = var40.i; + var47.i = _src.x2[1]; + var48.i = _src.x2[0]; + } + /* 4: convubw */ + var49.x2[0] = (orc_uint8) var45.x2[0]; + var49.x2[1] = (orc_uint8) var45.x2[1]; + /* 5: convubw */ + var50.x2[0] = (orc_uint8) var47.x2[0]; + var50.x2[1] = (orc_uint8) var47.x2[1]; + /* 7: mullw */ + var51.x2[0] = (var49.x2[0] * var41.x2[0]) & 0xffff; + var51.x2[1] = (var49.x2[1] * var41.x2[1]) & 0xffff; + /* 8: addw */ + var52.x2[0] = var51.x2[0] + var50.x2[0]; + var52.x2[1] = var51.x2[1] + var50.x2[1]; + /* 10: addw */ + var53.x2[0] = var52.x2[0] + var42.x2[0]; + var53.x2[1] = var52.x2[1] + var42.x2[1]; + /* 11: shruw */ + var54.x2[0] = ((orc_uint16) var53.x2[0]) >> 2; + var54.x2[1] = ((orc_uint16) var53.x2[1]) >> 2; + /* 12: convwb */ + var55.x2[0] = var54.x2[0]; + var55.x2[1] = var54.x2[1]; + /* 13: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var46.i; + _dest.x2[1] = var55.i; + var43.i = _dest.i; + } + /* 14: storel */ + ptr0[i] = var43; + /* 15: mullw */ + var56.x2[0] = (var50.x2[0] * var41.x2[0]) & 0xffff; + var56.x2[1] = (var50.x2[1] * var41.x2[1]) & 0xffff; + /* 16: addw */ + var57.x2[0] = var56.x2[0] + var49.x2[0]; + var57.x2[1] = var56.x2[1] + var49.x2[1]; + /* 17: addw */ + var58.x2[0] = var57.x2[0] + var42.x2[0]; + var58.x2[1] = var57.x2[1] + var42.x2[1]; + /* 18: shruw */ + var59.x2[0] = ((orc_uint16) var58.x2[0]) >> 2; + var59.x2[1] = ((orc_uint16) var58.x2[1]) >> 2; + /* 19: convwb */ + var60.x2[0] = var59.x2[0]; + var60.x2[1] = var59.x2[1]; + /* 20: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var48.i; + _dest.x2[1] = var60.i; + var44.i = _dest.i; + } + /* 21: storel */ + ptr1[i] = var44; + } + +} + +#else +static void +_backup_video_orc_chroma_up_v2_u8 (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int n = ex->n; + orc_union32 *ORC_RESTRICT ptr0; + orc_union32 *ORC_RESTRICT ptr1; + const orc_union32 *ORC_RESTRICT ptr4; + const orc_union32 *ORC_RESTRICT ptr5; + orc_union32 var39; + orc_union32 var40; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var41; +#else + orc_union32 var41; +#endif +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var42; +#else + orc_union32 var42; +#endif + orc_union32 var43; + orc_union32 var44; + orc_union16 var45; + orc_union16 var46; + orc_union16 var47; + orc_union16 var48; + orc_union32 var49; + orc_union32 var50; + orc_union32 var51; + orc_union32 var52; + orc_union32 var53; + orc_union32 var54; + orc_union16 var55; + orc_union32 var56; + orc_union32 var57; + orc_union32 var58; + orc_union32 var59; + orc_union16 var60; + + ptr0 = (orc_union32 *) ex->arrays[0]; + ptr1 = (orc_union32 *) ex->arrays[1]; + ptr4 = (orc_union32 *) ex->arrays[4]; + ptr5 = (orc_union32 *) ex->arrays[5]; + + /* 6: loadpw */ + var41.x2[0] = (int) 0x00000003; /* 3 or 1.4822e-323f */ + var41.x2[1] = (int) 0x00000003; /* 3 or 1.4822e-323f */ + /* 9: loadpw */ + var42.x2[0] = (int) 0x00000002; /* 2 or 9.88131e-324f */ + var42.x2[1] = (int) 0x00000002; /* 2 or 9.88131e-324f */ + + for (i = 0; i < n; i++) { + /* 0: loadl */ + var39 = ptr4[i]; + /* 1: splitlw */ + { + orc_union32 _src; + _src.i = var39.i; + var45.i = _src.x2[1]; + var46.i = _src.x2[0]; + } + /* 2: loadl */ + var40 = ptr5[i]; + /* 3: splitlw */ + { + orc_union32 _src; + _src.i = var40.i; + var47.i = _src.x2[1]; + var48.i = _src.x2[0]; + } + /* 4: convubw */ + var49.x2[0] = (orc_uint8) var45.x2[0]; + var49.x2[1] = (orc_uint8) var45.x2[1]; + /* 5: convubw */ + var50.x2[0] = (orc_uint8) var47.x2[0]; + var50.x2[1] = (orc_uint8) var47.x2[1]; + /* 7: mullw */ + var51.x2[0] = (var49.x2[0] * var41.x2[0]) & 0xffff; + var51.x2[1] = (var49.x2[1] * var41.x2[1]) & 0xffff; + /* 8: addw */ + var52.x2[0] = var51.x2[0] + var50.x2[0]; + var52.x2[1] = var51.x2[1] + var50.x2[1]; + /* 10: addw */ + var53.x2[0] = var52.x2[0] + var42.x2[0]; + var53.x2[1] = var52.x2[1] + var42.x2[1]; + /* 11: shruw */ + var54.x2[0] = ((orc_uint16) var53.x2[0]) >> 2; + var54.x2[1] = ((orc_uint16) var53.x2[1]) >> 2; + /* 12: convwb */ + var55.x2[0] = var54.x2[0]; + var55.x2[1] = var54.x2[1]; + /* 13: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var46.i; + _dest.x2[1] = var55.i; + var43.i = _dest.i; + } + /* 14: storel */ + ptr0[i] = var43; + /* 15: mullw */ + var56.x2[0] = (var50.x2[0] * var41.x2[0]) & 0xffff; + var56.x2[1] = (var50.x2[1] * var41.x2[1]) & 0xffff; + /* 16: addw */ + var57.x2[0] = var56.x2[0] + var49.x2[0]; + var57.x2[1] = var56.x2[1] + var49.x2[1]; + /* 17: addw */ + var58.x2[0] = var57.x2[0] + var42.x2[0]; + var58.x2[1] = var57.x2[1] + var42.x2[1]; + /* 18: shruw */ + var59.x2[0] = ((orc_uint16) var58.x2[0]) >> 2; + var59.x2[1] = ((orc_uint16) var58.x2[1]) >> 2; + /* 19: convwb */ + var60.x2[0] = var59.x2[0]; + var60.x2[1] = var59.x2[1]; + /* 20: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var48.i; + _dest.x2[1] = var60.i; + var44.i = _dest.i; + } + /* 21: storel */ + ptr1[i] = var44; + } + +} + +void +video_orc_chroma_up_v2_u8 (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 9, 25, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 104, 114, + 111, 109, 97, 95, 117, 112, 95, 118, 50, 95, 117, 56, 11, 4, 4, 11, + 4, 4, 12, 4, 4, 12, 4, 4, 14, 2, 3, 0, 0, 0, 14, 2, + 2, 0, 0, 0, 20, 2, 20, 2, 20, 2, 20, 2, 20, 4, 20, 4, + 20, 4, 198, 34, 32, 4, 198, 35, 33, 5, 21, 1, 150, 36, 34, 21, + 1, 150, 37, 35, 21, 1, 89, 38, 36, 16, 21, 1, 70, 38, 38, 37, + 21, 1, 70, 38, 38, 17, 21, 1, 95, 38, 38, 17, 21, 1, 157, 34, + 38, 195, 0, 32, 34, 21, 1, 89, 38, 37, 16, 21, 1, 70, 38, 38, + 36, 21, 1, 70, 38, 38, 17, 21, 1, 95, 38, 38, 17, 21, 1, 157, + 35, 38, 195, 1, 33, 35, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_video_orc_chroma_up_v2_u8); +#else + p = orc_program_new (); + orc_program_set_name (p, "video_orc_chroma_up_v2_u8"); + orc_program_set_backup_function (p, _backup_video_orc_chroma_up_v2_u8); + orc_program_add_destination (p, 4, "d1"); + orc_program_add_destination (p, 4, "d2"); + orc_program_add_source (p, 4, "s1"); + orc_program_add_source (p, 4, "s2"); + orc_program_add_constant (p, 2, 0x00000003, "c1"); + orc_program_add_constant (p, 2, 0x00000002, "c2"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 2, "t3"); + orc_program_add_temporary (p, 2, "t4"); + orc_program_add_temporary (p, 4, "t5"); + orc_program_add_temporary (p, 4, "t6"); + orc_program_add_temporary (p, 4, "t7"); + + orc_program_append_2 (p, "splitlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_S1, + ORC_VAR_D1); + orc_program_append_2 (p, "splitlw", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_S2, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 1, ORC_VAR_T5, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 1, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 1, ORC_VAR_T7, ORC_VAR_T5, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "shruw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 1, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T3, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 1, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "shruw", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 1, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_T4, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ex->arrays[ORC_VAR_D1] = d1; + ex->arrays[ORC_VAR_D2] = d2; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->arrays[ORC_VAR_S2] = (void *) s2; + + func = c->exec; + func (ex); +} +#endif + + +/* video_orc_chroma_up_v2_u16 */ +#ifdef DISABLE_ORC +void +video_orc_chroma_up_v2_u16 (guint16 * ORC_RESTRICT d1, + guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1, + const guint16 * ORC_RESTRICT s2, int n) +{ + int i; + orc_union64 *ORC_RESTRICT ptr0; + orc_union64 *ORC_RESTRICT ptr1; + const orc_union64 *ORC_RESTRICT ptr4; + const orc_union64 *ORC_RESTRICT ptr5; + orc_union64 var39; + orc_union64 var40; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union64 var41; +#else + orc_union64 var41; +#endif +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union64 var42; +#else + orc_union64 var42; +#endif + orc_union64 var43; + orc_union64 var44; + orc_union32 var45; + orc_union32 var46; + orc_union32 var47; + orc_union32 var48; + orc_union64 var49; + orc_union64 var50; + orc_union64 var51; + orc_union64 var52; + orc_union64 var53; + orc_union64 var54; + orc_union32 var55; + orc_union64 var56; + orc_union64 var57; + orc_union64 var58; + orc_union64 var59; + orc_union32 var60; + + ptr0 = (orc_union64 *) d1; + ptr1 = (orc_union64 *) d2; + ptr4 = (orc_union64 *) s1; + ptr5 = (orc_union64 *) s2; + + /* 6: loadpl */ + var41.x2[0] = (int) 0x00000003; /* 3 or 1.4822e-323f */ + var41.x2[1] = (int) 0x00000003; /* 3 or 1.4822e-323f */ + /* 9: loadpl */ + var42.x2[0] = (int) 0x00000002; /* 2 or 9.88131e-324f */ + var42.x2[1] = (int) 0x00000002; /* 2 or 9.88131e-324f */ + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var39 = ptr4[i]; + /* 1: splitql */ + { + orc_union64 _src; + _src.i = var39.i; + var45.i = _src.x2[1]; + var46.i = _src.x2[0]; + } + /* 2: loadq */ + var40 = ptr5[i]; + /* 3: splitql */ + { + orc_union64 _src; + _src.i = var40.i; + var47.i = _src.x2[1]; + var48.i = _src.x2[0]; + } + /* 4: convuwl */ + var49.x2[0] = (orc_uint16) var45.x2[0]; + var49.x2[1] = (orc_uint16) var45.x2[1]; + /* 5: convuwl */ + var50.x2[0] = (orc_uint16) var47.x2[0]; + var50.x2[1] = (orc_uint16) var47.x2[1]; + /* 7: mulll */ + var51.x2[0] = + (((orc_uint32) var49.x2[0]) * ((orc_uint32) var41.x2[0])) & 0xffffffff; + var51.x2[1] = + (((orc_uint32) var49.x2[1]) * ((orc_uint32) var41.x2[1])) & 0xffffffff; + /* 8: addl */ + var52.x2[0] = ((orc_uint32) var51.x2[0]) + ((orc_uint32) var50.x2[0]); + var52.x2[1] = ((orc_uint32) var51.x2[1]) + ((orc_uint32) var50.x2[1]); + /* 10: addl */ + var53.x2[0] = ((orc_uint32) var52.x2[0]) + ((orc_uint32) var42.x2[0]); + var53.x2[1] = ((orc_uint32) var52.x2[1]) + ((orc_uint32) var42.x2[1]); + /* 11: shrul */ + var54.x2[0] = ((orc_uint32) var53.x2[0]) >> 2; + var54.x2[1] = ((orc_uint32) var53.x2[1]) >> 2; + /* 12: convlw */ + var55.x2[0] = var54.x2[0]; + var55.x2[1] = var54.x2[1]; + /* 13: mergelq */ + { + orc_union64 _dest; + _dest.x2[0] = var46.i; + _dest.x2[1] = var55.i; + var43.i = _dest.i; + } + /* 14: storeq */ + ptr0[i] = var43; + /* 15: mulll */ + var56.x2[0] = + (((orc_uint32) var50.x2[0]) * ((orc_uint32) var41.x2[0])) & 0xffffffff; + var56.x2[1] = + (((orc_uint32) var50.x2[1]) * ((orc_uint32) var41.x2[1])) & 0xffffffff; + /* 16: addl */ + var57.x2[0] = ((orc_uint32) var56.x2[0]) + ((orc_uint32) var49.x2[0]); + var57.x2[1] = ((orc_uint32) var56.x2[1]) + ((orc_uint32) var49.x2[1]); + /* 17: addl */ + var58.x2[0] = ((orc_uint32) var57.x2[0]) + ((orc_uint32) var42.x2[0]); + var58.x2[1] = ((orc_uint32) var57.x2[1]) + ((orc_uint32) var42.x2[1]); + /* 18: shrul */ + var59.x2[0] = ((orc_uint32) var58.x2[0]) >> 2; + var59.x2[1] = ((orc_uint32) var58.x2[1]) >> 2; + /* 19: convlw */ + var60.x2[0] = var59.x2[0]; + var60.x2[1] = var59.x2[1]; + /* 20: mergelq */ + { + orc_union64 _dest; + _dest.x2[0] = var48.i; + _dest.x2[1] = var60.i; + var44.i = _dest.i; + } + /* 21: storeq */ + ptr1[i] = var44; + } + +} + +#else +static void +_backup_video_orc_chroma_up_v2_u16 (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int n = ex->n; + orc_union64 *ORC_RESTRICT ptr0; + orc_union64 *ORC_RESTRICT ptr1; + const orc_union64 *ORC_RESTRICT ptr4; + const orc_union64 *ORC_RESTRICT ptr5; + orc_union64 var39; + orc_union64 var40; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union64 var41; +#else + orc_union64 var41; +#endif +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union64 var42; +#else + orc_union64 var42; +#endif + orc_union64 var43; + orc_union64 var44; + orc_union32 var45; + orc_union32 var46; + orc_union32 var47; + orc_union32 var48; + orc_union64 var49; + orc_union64 var50; + orc_union64 var51; + orc_union64 var52; + orc_union64 var53; + orc_union64 var54; + orc_union32 var55; + orc_union64 var56; + orc_union64 var57; + orc_union64 var58; + orc_union64 var59; + orc_union32 var60; + + ptr0 = (orc_union64 *) ex->arrays[0]; + ptr1 = (orc_union64 *) ex->arrays[1]; + ptr4 = (orc_union64 *) ex->arrays[4]; + ptr5 = (orc_union64 *) ex->arrays[5]; + + /* 6: loadpl */ + var41.x2[0] = (int) 0x00000003; /* 3 or 1.4822e-323f */ + var41.x2[1] = (int) 0x00000003; /* 3 or 1.4822e-323f */ + /* 9: loadpl */ + var42.x2[0] = (int) 0x00000002; /* 2 or 9.88131e-324f */ + var42.x2[1] = (int) 0x00000002; /* 2 or 9.88131e-324f */ + + for (i = 0; i < n; i++) { + /* 0: loadq */ + var39 = ptr4[i]; + /* 1: splitql */ + { + orc_union64 _src; + _src.i = var39.i; + var45.i = _src.x2[1]; + var46.i = _src.x2[0]; + } + /* 2: loadq */ + var40 = ptr5[i]; + /* 3: splitql */ + { + orc_union64 _src; + _src.i = var40.i; + var47.i = _src.x2[1]; + var48.i = _src.x2[0]; + } + /* 4: convuwl */ + var49.x2[0] = (orc_uint16) var45.x2[0]; + var49.x2[1] = (orc_uint16) var45.x2[1]; + /* 5: convuwl */ + var50.x2[0] = (orc_uint16) var47.x2[0]; + var50.x2[1] = (orc_uint16) var47.x2[1]; + /* 7: mulll */ + var51.x2[0] = + (((orc_uint32) var49.x2[0]) * ((orc_uint32) var41.x2[0])) & 0xffffffff; + var51.x2[1] = + (((orc_uint32) var49.x2[1]) * ((orc_uint32) var41.x2[1])) & 0xffffffff; + /* 8: addl */ + var52.x2[0] = ((orc_uint32) var51.x2[0]) + ((orc_uint32) var50.x2[0]); + var52.x2[1] = ((orc_uint32) var51.x2[1]) + ((orc_uint32) var50.x2[1]); + /* 10: addl */ + var53.x2[0] = ((orc_uint32) var52.x2[0]) + ((orc_uint32) var42.x2[0]); + var53.x2[1] = ((orc_uint32) var52.x2[1]) + ((orc_uint32) var42.x2[1]); + /* 11: shrul */ + var54.x2[0] = ((orc_uint32) var53.x2[0]) >> 2; + var54.x2[1] = ((orc_uint32) var53.x2[1]) >> 2; + /* 12: convlw */ + var55.x2[0] = var54.x2[0]; + var55.x2[1] = var54.x2[1]; + /* 13: mergelq */ + { + orc_union64 _dest; + _dest.x2[0] = var46.i; + _dest.x2[1] = var55.i; + var43.i = _dest.i; + } + /* 14: storeq */ + ptr0[i] = var43; + /* 15: mulll */ + var56.x2[0] = + (((orc_uint32) var50.x2[0]) * ((orc_uint32) var41.x2[0])) & 0xffffffff; + var56.x2[1] = + (((orc_uint32) var50.x2[1]) * ((orc_uint32) var41.x2[1])) & 0xffffffff; + /* 16: addl */ + var57.x2[0] = ((orc_uint32) var56.x2[0]) + ((orc_uint32) var49.x2[0]); + var57.x2[1] = ((orc_uint32) var56.x2[1]) + ((orc_uint32) var49.x2[1]); + /* 17: addl */ + var58.x2[0] = ((orc_uint32) var57.x2[0]) + ((orc_uint32) var42.x2[0]); + var58.x2[1] = ((orc_uint32) var57.x2[1]) + ((orc_uint32) var42.x2[1]); + /* 18: shrul */ + var59.x2[0] = ((orc_uint32) var58.x2[0]) >> 2; + var59.x2[1] = ((orc_uint32) var58.x2[1]) >> 2; + /* 19: convlw */ + var60.x2[0] = var59.x2[0]; + var60.x2[1] = var59.x2[1]; + /* 20: mergelq */ + { + orc_union64 _dest; + _dest.x2[0] = var48.i; + _dest.x2[1] = var60.i; + var44.i = _dest.i; + } + /* 21: storeq */ + ptr1[i] = var44; + } + +} + +void +video_orc_chroma_up_v2_u16 (guint16 * ORC_RESTRICT d1, + guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1, + const guint16 * ORC_RESTRICT s2, int n) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 9, 26, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 104, 114, + 111, 109, 97, 95, 117, 112, 95, 118, 50, 95, 117, 49, 54, 11, 8, 8, + 11, 8, 8, 12, 8, 8, 12, 8, 8, 14, 4, 3, 0, 0, 0, 14, + 4, 2, 0, 0, 0, 20, 4, 20, 4, 20, 4, 20, 4, 20, 8, 20, + 8, 20, 8, 197, 34, 32, 4, 197, 35, 33, 5, 21, 1, 154, 36, 34, + 21, 1, 154, 37, 35, 21, 1, 120, 38, 36, 16, 21, 1, 103, 38, 38, + 37, 21, 1, 103, 38, 38, 17, 21, 1, 126, 38, 38, 17, 21, 1, 163, + 34, 38, 194, 0, 32, 34, 21, 1, 120, 38, 37, 16, 21, 1, 103, 38, + 38, 36, 21, 1, 103, 38, 38, 17, 21, 1, 126, 38, 38, 17, 21, 1, + 163, 35, 38, 194, 1, 33, 35, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_video_orc_chroma_up_v2_u16); +#else + p = orc_program_new (); + orc_program_set_name (p, "video_orc_chroma_up_v2_u16"); + orc_program_set_backup_function (p, _backup_video_orc_chroma_up_v2_u16); + orc_program_add_destination (p, 8, "d1"); + orc_program_add_destination (p, 8, "d2"); + orc_program_add_source (p, 8, "s1"); + orc_program_add_source (p, 8, "s2"); + orc_program_add_constant (p, 4, 0x00000003, "c1"); + orc_program_add_constant (p, 4, 0x00000002, "c2"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 4, "t2"); + orc_program_add_temporary (p, 4, "t3"); + orc_program_add_temporary (p, 4, "t4"); + orc_program_add_temporary (p, 8, "t5"); + orc_program_add_temporary (p, 8, "t6"); + orc_program_add_temporary (p, 8, "t7"); + + orc_program_append_2 (p, "splitql", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_S1, + ORC_VAR_D1); + orc_program_append_2 (p, "splitql", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_S2, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 1, ORC_VAR_T5, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 1, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulll", 1, ORC_VAR_T7, ORC_VAR_T5, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "convlw", 1, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mergelq", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T3, + ORC_VAR_D1); + orc_program_append_2 (p, "mulll", 1, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 1, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "convlw", 1, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mergelq", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_T4, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ex->arrays[ORC_VAR_D1] = d1; + ex->arrays[ORC_VAR_D2] = d2; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->arrays[ORC_VAR_S2] = (void *) s2; + + func = c->exec; + func (ex); +} +#endif + + /* video_orc_chroma_down_v2_u16 */ #ifdef DISABLE_ORC void diff --git a/gst-libs/gst/video/video-orc-dist.h b/gst-libs/gst/video/video-orc-dist.h index df1e8a5842..befb845760 100644 --- a/gst-libs/gst/video/video-orc-dist.h +++ b/gst-libs/gst/video/video-orc-dist.h @@ -177,6 +177,8 @@ void video_orc_resample_v_multaps_u8_lq (gint32 * ORC_RESTRICT d1, const guint32 void video_orc_resample_v_muladdtaps_u8_lq (gint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int p1, int n); void video_orc_chroma_down_h2_u8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); void video_orc_chroma_down_v2_u8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n); +void video_orc_chroma_up_v2_u8 (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, int n); +void video_orc_chroma_up_v2_u16 (guint16 * ORC_RESTRICT d1, guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2, int n); void video_orc_chroma_down_v2_u16 (guint16 * ORC_RESTRICT d1, const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2, int n); void video_orc_chroma_down_v4_u8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int n); void video_orc_chroma_down_v4_u16 (guint16 * ORC_RESTRICT d1, const guint16 * ORC_RESTRICT s1, const guint16 * ORC_RESTRICT s2, const guint16 * ORC_RESTRICT s3, const guint16 * ORC_RESTRICT s4, int n); diff --git a/gst-libs/gst/video/video-orc.orc b/gst-libs/gst/video/video-orc.orc index cf6c2bd5af..db23dfbe85 100644 --- a/gst-libs/gst/video/video-orc.orc +++ b/gst-libs/gst/video/video-orc.orc @@ -1672,6 +1672,70 @@ select1lw uv2, s2 x2 avgub uv1, uv1, uv2 mergewl d, ay1, uv1 +.function video_orc_chroma_up_v2_u8 +.source 4 s1 guint8 +.source 4 s2 guint8 +.dest 4 d1 guint8 +.dest 4 d2 guint8 +.temp 2 ay1 +.temp 2 ay2 +.temp 2 uv1 +.temp 2 uv2 +.temp 4 uuvv1 +.temp 4 uuvv2 +.temp 4 uuvv3 + +splitlw uv1, ay1, s1 +splitlw uv2, ay2, s2 +x2 convubw uuvv1, uv1 +x2 convubw uuvv2, uv2 + +x2 mullw uuvv3, uuvv1, 3 +x2 addw uuvv3, uuvv3, uuvv2 +x2 addw uuvv3, uuvv3, 2 +x2 shruw uuvv3, uuvv3, 2 +x2 convwb uv1, uuvv3 +mergewl d1, ay1, uv1 + +x2 mullw uuvv3, uuvv2, 3 +x2 addw uuvv3, uuvv3, uuvv1 +x2 addw uuvv3, uuvv3, 2 +x2 shruw uuvv3, uuvv3, 2 +x2 convwb uv2, uuvv3 +mergewl d2, ay2, uv2 + +.function video_orc_chroma_up_v2_u16 +.source 8 s1 guint16 +.source 8 s2 guint16 +.dest 8 d1 guint16 +.dest 8 d2 guint16 +.temp 4 ay1 +.temp 4 ay2 +.temp 4 uv1 +.temp 4 uv2 +.temp 8 uuvv1 +.temp 8 uuvv2 +.temp 8 uuvv3 + +splitql uv1, ay1, s1 +splitql uv2, ay2, s2 +x2 convuwl uuvv1, uv1 +x2 convuwl uuvv2, uv2 + +x2 mulll uuvv3, uuvv1, 3 +x2 addl uuvv3, uuvv3, uuvv2 +x2 addl uuvv3, uuvv3, 2 +x2 shrul uuvv3, uuvv3, 2 +x2 convlw uv1, uuvv3 +mergelq d1, ay1, uv1 + +x2 mulll uuvv3, uuvv2, 3 +x2 addl uuvv3, uuvv3, uuvv1 +x2 addl uuvv3, uuvv3, 2 +x2 shrul uuvv3, uuvv3, 2 +x2 convlw uv2, uuvv3 +mergelq d2, ay2, uv2 + .function video_orc_chroma_down_v2_u16 .source 8 s1 guint16 .source 8 s2 guint16 diff --git a/tests/check/libs/video.c b/tests/check/libs/video.c index 9f6f2f0206..2bb2365420 100644 --- a/tests/check/libs/video.c +++ b/tests/check/libs/video.c @@ -1801,10 +1801,10 @@ GST_START_TEST (test_video_pack_unpack2) GTimer *timer; gint num_formats; -#define WIDTH 1920 -#define HEIGHT 1080 +#define WIDTH 320 +#define HEIGHT 240 /* set to something larger to do benchmarks */ -#define TIME 0.0 +#define TIME 0.01 timer = g_timer_new (); @@ -1880,8 +1880,9 @@ GST_START_TEST (test_video_pack_unpack2) /* compare the frame */ diff = compare_frame (finfo, depth, outpixels, pixels, WIDTH, HEIGHT); - GST_DEBUG ("%f \t %f \t %f \t %f \t %s", pack_sec, unpack_sec, - info.size * pack_sec, info.size * unpack_sec, finfo->name); + GST_DEBUG ("%f \t %f \t %f \t %f \t %s %d/%f", pack_sec, unpack_sec, + info.size * pack_sec, info.size * unpack_sec, finfo->name, count, + elapsed); if (diff != 0) { gst_util_dump_mem (outpixels, 128); @@ -1901,50 +1902,89 @@ GST_END_TEST; #undef HEIGHT #undef TIME -#define WIDTH 1920 -#define HEIGHT 1080 +#define WIDTH 320 +#define HEIGHT 240 #define TIME 0.1 #define GET_LINE(l) (pixels + CLAMP (l, 0, HEIGHT-1) * WIDTH * 4) GST_START_TEST (test_video_chroma) { guint8 *pixels; - GstVideoChromaResample *resample; guint n_lines; - gint i, j, offset, count; - gpointer lines[2]; + gint i, j, k, offset, count; + gpointer lines[10]; GTimer *timer; gdouble elapsed, subsample_sec; + GstVideoChromaSite sites[] = { + GST_VIDEO_CHROMA_SITE_NONE, + GST_VIDEO_CHROMA_SITE_H_COSITED, + }; timer = g_timer_new (); pixels = make_pixels (8, WIDTH, HEIGHT); - resample = gst_video_chroma_resample_new (GST_VIDEO_CHROMA_METHOD_LINEAR, - GST_VIDEO_CHROMA_SITE_NONE, GST_VIDEO_CHROMA_FLAG_NONE, - GST_VIDEO_FORMAT_AYUV, -1, -1); + for (k = 0; k < G_N_ELEMENTS (sites); k++) { + GstVideoChromaResample *resample; - gst_video_chroma_resample_get_info (resample, &n_lines, &offset); - fail_unless (n_lines == 2); - fail_unless (offset == 0); + resample = gst_video_chroma_resample_new (GST_VIDEO_CHROMA_METHOD_LINEAR, + sites[k], GST_VIDEO_CHROMA_FLAG_NONE, GST_VIDEO_FORMAT_AYUV, -1, -1); - count = 0; - g_timer_start (timer); - while (TRUE) { - for (i = 0; i < HEIGHT; i += n_lines) { - for (j = 0; j < n_lines; j++) - lines[j] = GET_LINE (i + offset + j); + gst_video_chroma_resample_get_info (resample, &n_lines, &offset); + fail_unless (n_lines < 10); - gst_video_chroma_resample (resample, lines, WIDTH); + /* warmup */ + for (j = 0; j < n_lines; j++) + lines[j] = GET_LINE (offset + j); + gst_video_chroma_resample (resample, lines, WIDTH); + + count = 0; + g_timer_start (timer); + while (TRUE) { + for (i = 0; i < HEIGHT; i += n_lines) { + for (j = 0; j < n_lines; j++) + lines[j] = GET_LINE (i + offset + j); + + gst_video_chroma_resample (resample, lines, WIDTH); + } + count++; + elapsed = g_timer_elapsed (timer, NULL); + if (elapsed >= TIME) + break; } - count++; - elapsed = g_timer_elapsed (timer, NULL); - if (elapsed >= TIME) - break; + subsample_sec = count / elapsed; + GST_DEBUG ("%f downsamples/sec %d/%f", subsample_sec, count, elapsed); + gst_video_chroma_resample_free (resample); + + resample = gst_video_chroma_resample_new (GST_VIDEO_CHROMA_METHOD_LINEAR, + sites[k], GST_VIDEO_CHROMA_FLAG_NONE, GST_VIDEO_FORMAT_AYUV, 1, 1); + + gst_video_chroma_resample_get_info (resample, &n_lines, &offset); + fail_unless (n_lines < 10); + + /* warmup */ + for (j = 0; j < n_lines; j++) + lines[j] = GET_LINE (offset + j); + gst_video_chroma_resample (resample, lines, WIDTH); + + count = 0; + g_timer_start (timer); + while (TRUE) { + for (i = 0; i < HEIGHT; i += n_lines) { + for (j = 0; j < n_lines; j++) + lines[j] = GET_LINE (i + offset + j); + + gst_video_chroma_resample (resample, lines, WIDTH); + } + count++; + elapsed = g_timer_elapsed (timer, NULL); + if (elapsed >= TIME) + break; + } + subsample_sec = count / elapsed; + GST_DEBUG ("%f upsamples/sec %d/%f", subsample_sec, count, elapsed); + gst_video_chroma_resample_free (resample); } - subsample_sec = count / elapsed; - GST_DEBUG ("%f subsamples/sec", subsample_sec); - - gst_video_chroma_resample_free (resample); + g_free (pixels); g_timer_destroy (timer); } @@ -1968,9 +2008,9 @@ GST_START_TEST (test_video_scaler) GST_END_TEST; -#define WIDTH 192 -#define HEIGHT 108 -#define TIME 0.0 +#define WIDTH 320 +#define HEIGHT 240 +#define TIME 0.01 #define GET_LINE(l) (pixels + CLAMP (l, 0, HEIGHT-1) * WIDTH * 4) typedef struct @@ -2011,6 +2051,7 @@ GST_START_TEST (test_video_color_convert) gst_video_info_set_format (&ininfo, infmt, WIDTH, HEIGHT); inbuffer = gst_buffer_new_and_alloc (ininfo.size); + gst_buffer_memset (inbuffer, 0, 0, -1); gst_video_frame_map (&inframe, &ininfo, inbuffer, GST_MAP_READ); for (outfmt = GST_VIDEO_FORMAT_I420; outfmt < num_formats; outfmt++) { @@ -2027,6 +2068,8 @@ GST_START_TEST (test_video_color_convert) gst_video_frame_map (&outframe, &outinfo, outbuffer, GST_MAP_WRITE); convert = gst_video_converter_new (&ininfo, &outinfo, NULL); + /* warmup */ + gst_video_converter_frame (convert, &inframe, &outframe); count = 0; g_timer_start (timer); @@ -2043,9 +2086,9 @@ GST_START_TEST (test_video_color_convert) res.outfmt = outfmt; res.convert_sec = count / elapsed; - GST_DEBUG ("%f conversions/sec %s->%s", res.convert_sec, + GST_DEBUG ("%f conversions/sec %s->%s, %d/%f", res.convert_sec, gst_video_format_to_string (infmt), - gst_video_format_to_string (outfmt)); + gst_video_format_to_string (outfmt), count, elapsed); g_array_append_val (array, res); @@ -2083,6 +2126,8 @@ video_suite (void) Suite *s = suite_create ("video support library"); TCase *tc_chain = tcase_create ("general"); + tcase_set_timeout (tc_chain, 60 * 60); + suite_add_tcase (s, tc_chain); tcase_add_test (tc_chain, test_video_formats); tcase_add_test (tc_chain, test_video_formats_rgb);