diff --git a/gst-libs/gst/video/video-orc-dist.c b/gst-libs/gst/video/video-orc-dist.c index 152cef2444..47a185c998 100644 --- a/gst-libs/gst/video/video-orc-dist.c +++ b/gst-libs/gst/video/video-orc-dist.c @@ -178,6 +178,8 @@ void video_orc_convert_u16_to_u8 (guint8 * ORC_RESTRICT d1, const guint16 * ORC_RESTRICT s1, int n); void video_orc_convert_u8_to_u16 (guint16 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); +void video_orc_splat_u32 (guint8 * ORC_RESTRICT d1, int p1, int n); +void video_orc_splat_u64 (guint8 * ORC_RESTRICT d1, int p1, int n); void video_orc_convert_I420_UYVY (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, @@ -7889,6 +7891,252 @@ video_orc_convert_u8_to_u16 (guint16 * ORC_RESTRICT d1, #endif +/* video_orc_splat_u32 */ +#ifdef DISABLE_ORC +void +video_orc_splat_u32 (guint8 * ORC_RESTRICT d1, int p1, int n) +{ + int i; + orc_union32 *ORC_RESTRICT ptr0; + orc_union32 var32; + + ptr0 = (orc_union32 *) d1; + + /* 0: loadpl */ + var32.i = p1; + + for (i = 0; i < n; i++) { + /* 1: storel */ + ptr0[i] = var32; + } + +} + +#else +static void +_backup_video_orc_splat_u32 (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int n = ex->n; + orc_union32 *ORC_RESTRICT ptr0; + orc_union32 var32; + + ptr0 = (orc_union32 *) ex->arrays[0]; + + /* 0: loadpl */ + var32.i = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 1: storel */ + ptr0[i] = var32; + } + +} + +void +video_orc_splat_u32 (guint8 * ORC_RESTRICT d1, int p1, int n) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 9, 19, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 115, 112, 108, + 97, 116, 95, 117, 51, 50, 11, 4, 4, 16, 4, 128, 0, 24, 2, 0, + + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_video_orc_splat_u32); +#else + p = orc_program_new (); + orc_program_set_name (p, "video_orc_splat_u32"); + orc_program_set_backup_function (p, _backup_video_orc_splat_u32); + orc_program_add_destination (p, 4, "d1"); + orc_program_add_parameter (p, 4, "p1"); + + orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + +/* video_orc_splat_u64 */ +#ifdef DISABLE_ORC +void +video_orc_splat_u64 (guint8 * ORC_RESTRICT d1, int p1, int n) +{ + int i; + orc_union64 *ORC_RESTRICT ptr0; + orc_union32 var32; + orc_union64 var33; + + ptr0 = (orc_union64 *) d1; + + /* 0: loadpb */ + var32.x4[0] = p1; + var32.x4[1] = p1; + var32.x4[2] = p1; + var32.x4[3] = p1; + + for (i = 0; i < n; i++) { + /* 1: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var32.x4[0]; + _dest.x2[1] = var32.x4[0]; + var33.x4[0] = _dest.i; + } + { + orc_union16 _dest; + _dest.x2[0] = var32.x4[1]; + _dest.x2[1] = var32.x4[1]; + var33.x4[1] = _dest.i; + } + { + orc_union16 _dest; + _dest.x2[0] = var32.x4[2]; + _dest.x2[1] = var32.x4[2]; + var33.x4[2] = _dest.i; + } + { + orc_union16 _dest; + _dest.x2[0] = var32.x4[3]; + _dest.x2[1] = var32.x4[3]; + var33.x4[3] = _dest.i; + } + /* 2: storeq */ + ptr0[i] = var33; + } + +} + +#else +static void +_backup_video_orc_splat_u64 (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int n = ex->n; + orc_union64 *ORC_RESTRICT ptr0; + orc_union32 var32; + orc_union64 var33; + + ptr0 = (orc_union64 *) ex->arrays[0]; + + /* 0: loadpb */ + var32.x4[0] = ex->params[24]; + var32.x4[1] = ex->params[24]; + var32.x4[2] = ex->params[24]; + var32.x4[3] = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 1: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var32.x4[0]; + _dest.x2[1] = var32.x4[0]; + var33.x4[0] = _dest.i; + } + { + orc_union16 _dest; + _dest.x2[0] = var32.x4[1]; + _dest.x2[1] = var32.x4[1]; + var33.x4[1] = _dest.i; + } + { + orc_union16 _dest; + _dest.x2[0] = var32.x4[2]; + _dest.x2[1] = var32.x4[2]; + var33.x4[2] = _dest.i; + } + { + orc_union16 _dest; + _dest.x2[0] = var32.x4[3]; + _dest.x2[1] = var32.x4[3]; + var33.x4[3] = _dest.i; + } + /* 2: storeq */ + ptr0[i] = var33; + } + +} + +void +video_orc_splat_u64 (guint8 * ORC_RESTRICT d1, int p1, int n) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 9, 19, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 115, 112, 108, + 97, 116, 95, 117, 54, 52, 11, 8, 8, 16, 4, 21, 2, 196, 0, 24, + 24, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_video_orc_splat_u64); +#else + p = orc_program_new (); + orc_program_set_name (p, "video_orc_splat_u64"); + orc_program_set_backup_function (p, _backup_video_orc_splat_u64); + orc_program_add_destination (p, 8, "d1"); + orc_program_add_parameter (p, 4, "p1"); + + orc_program_append_2 (p, "mergebw", 2, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_P1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + /* video_orc_convert_I420_UYVY */ #ifdef DISABLE_ORC void diff --git a/gst-libs/gst/video/video-orc-dist.h b/gst-libs/gst/video/video-orc-dist.h index befb845760..5b686ff39d 100644 --- a/gst-libs/gst/video/video-orc-dist.h +++ b/gst-libs/gst/video/video-orc-dist.h @@ -118,6 +118,8 @@ void video_orc_merge_linear_u8 (orc_uint8 * ORC_RESTRICT d1, const orc_uint8 * O void video_orc_memcpy_2d (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int n, int m); void video_orc_convert_u16_to_u8 (guint8 * ORC_RESTRICT d1, const guint16 * ORC_RESTRICT s1, int n); void video_orc_convert_u8_to_u16 (guint16 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, int n); +void video_orc_splat_u32 (guint8 * ORC_RESTRICT d1, int p1, int n); +void video_orc_splat_u64 (guint8 * ORC_RESTRICT d1, int p1, int n); void video_orc_convert_I420_UYVY (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int n); void video_orc_convert_I420_YUY2 (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int n); void video_orc_convert_I420_AYUV (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int n);