From a12f51c3deabb8c6c08b33d2d7a5e7d3f38df77f Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Fri, 22 Apr 2016 15:07:10 +0200 Subject: [PATCH] video-converter: add more fastpaths for I420 -> RGB Use the I420->BGRA and a new I420->ARGB to speed up any I420 to RGB operation. --- gst-libs/gst/video/video-converter.c | 137 +++++++- gst-libs/gst/video/video-orc-dist.c | 455 +++++++++++++++++++++++++++ gst-libs/gst/video/video-orc-dist.h | 1 + gst-libs/gst/video/video-orc.orc | 57 +++- 4 files changed, 648 insertions(+), 2 deletions(-) diff --git a/gst-libs/gst/video/video-converter.c b/gst-libs/gst/video/video-converter.c index 7835c40cef..3a620d7d3d 100644 --- a/gst-libs/gst/video/video-converter.c +++ b/gst-libs/gst/video/video-converter.c @@ -3599,9 +3599,93 @@ convert_I420_BGRA (GstVideoConverter * convert, const GstVideoFrame * src, sv = FRAME_GET_V_LINE (src, (i + convert->in_y) >> 1); sv += (convert->in_x >> 1); +#if G_BYTE_ORDER == G_LITTLE_ENDIAN video_orc_convert_I420_BGRA (d, sy, su, sv, data->im[0][0], data->im[0][2], data->im[2][1], data->im[1][1], data->im[1][2], width); +#else + video_orc_convert_I420_ARGB (d, sy, su, sv, + data->im[0][0], data->im[0][2], + data->im[2][1], data->im[1][1], data->im[1][2], width); +#endif + } + convert_fill_border (convert, dest); +} + +static void +convert_I420_ARGB (GstVideoConverter * convert, const GstVideoFrame * src, + GstVideoFrame * dest) +{ + int i; + gint width = convert->in_width; + gint height = convert->in_height; + MatrixData *data = &convert->convert_matrix; + + for (i = 0; i < height; i++) { + guint8 *sy, *su, *sv, *d; + + d = FRAME_GET_LINE (dest, i + convert->out_y); + d += (convert->out_x * 4); + sy = FRAME_GET_Y_LINE (src, i + convert->in_y); + sy += convert->in_x; + su = FRAME_GET_U_LINE (src, (i + convert->in_y) >> 1); + su += (convert->in_x >> 1); + sv = FRAME_GET_V_LINE (src, (i + convert->in_y) >> 1); + sv += (convert->in_x >> 1); + +#if G_BYTE_ORDER == G_LITTLE_ENDIAN + video_orc_convert_I420_ARGB (d, sy, su, sv, + data->im[0][0], data->im[0][2], + data->im[2][1], data->im[1][1], data->im[1][2], width); +#else + video_orc_convert_I420_BGRA (d, sy, su, sv, + data->im[0][0], data->im[0][2], + data->im[2][1], data->im[1][1], data->im[1][2], width); +#endif + } + convert_fill_border (convert, dest); +} + +static void +convert_I420_pack_ARGB (GstVideoConverter * convert, const GstVideoFrame * src, + GstVideoFrame * dest) +{ + int i; + gint width = convert->in_width; + gint height = convert->in_height; + MatrixData *data = &convert->convert_matrix; + gpointer tmp = convert->tmpline; + gpointer d[GST_VIDEO_MAX_PLANES]; + gint pstride = GST_VIDEO_FORMAT_INFO_PSTRIDE (dest->info.finfo, 0); + + d[0] = FRAME_GET_LINE (dest, 0); + d[0] = (guint8 *) d[0] + convert->out_x * pstride; + + for (i = 0; i < height; i++) { + guint8 *sy, *su, *sv; + + sy = FRAME_GET_Y_LINE (src, i + convert->in_y); + sy += convert->in_x; + su = FRAME_GET_U_LINE (src, (i + convert->in_y) >> 1); + su += (convert->in_x >> 1); + sv = FRAME_GET_V_LINE (src, (i + convert->in_y) >> 1); + sv += (convert->in_x >> 1); + +#if G_BYTE_ORDER == G_LITTLE_ENDIAN + video_orc_convert_I420_ARGB (tmp, sy, su, sv, + data->im[0][0], data->im[0][2], + data->im[2][1], data->im[1][1], data->im[1][2], width); +#else + video_orc_convert_I420_BGRA (tmp, sy, su, sv, + data->im[0][0], data->im[0][2], + data->im[2][1], data->im[1][1], data->im[1][2], width); +#endif + dest->info.finfo->pack_func (dest->info.finfo, + (GST_VIDEO_FRAME_IS_INTERLACED (dest) ? + GST_VIDEO_PACK_FLAG_INTERLACED : + GST_VIDEO_PACK_FLAG_NONE), + tmp, 0, d, dest->info.stride, + dest->info.chroma_site, i + convert->out_y, width); } convert_fill_border (convert, dest); } @@ -4611,6 +4695,7 @@ static const VideoTransform transforms[] = { FALSE, FALSE, FALSE, 0, 0, convert_AYUV_ABGR}, /* alias */ {GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_RGBx, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, 0, 0, convert_AYUV_RGBA}, /* alias */ +#endif {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGRA, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA}, @@ -4620,7 +4705,57 @@ static const VideoTransform transforms[] = { TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA}, {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGRx, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA}, -#endif + + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_ARGB, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_xRGB, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_ARGB, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_xRGB, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_ARGB}, + + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_ABGR, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_xBGR, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGBA, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGBx, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGB, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGR, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGB15, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGR15, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGB16, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_ABGR, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_xBGR, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGBA, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGBx, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGB, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGB15, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR15, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGB16, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, + {GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE, + TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB}, /* scalers */ {GST_VIDEO_FORMAT_GBR, GST_VIDEO_FORMAT_GBR, TRUE, FALSE, FALSE, TRUE, diff --git a/gst-libs/gst/video/video-orc-dist.c b/gst-libs/gst/video/video-orc-dist.c index fa57537d75..e59237b8af 100644 --- a/gst-libs/gst/video/video-orc-dist.c +++ b/gst-libs/gst/video/video-orc-dist.c @@ -333,6 +333,10 @@ void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n); +void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, + int n); void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n); @@ -22026,6 +22030,457 @@ video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, #endif +/* video_orc_convert_I420_ARGB */ +#ifdef DISABLE_ORC +void +video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, + int n) +{ + int i; + orc_union32 *ORC_RESTRICT ptr0; + const orc_int8 *ORC_RESTRICT ptr4; + const orc_int8 *ORC_RESTRICT ptr5; + const orc_int8 *ORC_RESTRICT ptr6; + orc_int8 var42; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_int8 var43; +#else + orc_int8 var43; +#endif + orc_union16 var44; + orc_union16 var45; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_int8 var46; +#else + orc_int8 var46; +#endif + orc_union16 var47; + orc_union16 var48; + orc_union16 var49; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var50; +#else + orc_union32 var50; +#endif + orc_union32 var51; + orc_int8 var52; + orc_union16 var53; + orc_int8 var54; + orc_int8 var55; + orc_union16 var56; + orc_int8 var57; + orc_int8 var58; + orc_union16 var59; + orc_union16 var60; + orc_union16 var61; + orc_union16 var62; + orc_int8 var63; + orc_union16 var64; + orc_union16 var65; + orc_union16 var66; + orc_int8 var67; + orc_union16 var68; + orc_union16 var69; + orc_union16 var70; + orc_union16 var71; + orc_int8 var72; + orc_union16 var73; + orc_union32 var74; + + ptr0 = (orc_union32 *) d1; + ptr4 = (orc_int8 *) s1; + ptr5 = (orc_int8 *) s2; + ptr6 = (orc_int8 *) s3; + + /* 1: loadpb */ + var43 = (int) 0x00000080; /* 128 or 6.32404e-322f */ + /* 10: loadpw */ + var44.i = p1; + /* 12: loadpw */ + var45.i = p2; + /* 16: loadpb */ + var46 = (int) 0x0000007f; /* 127 or 6.27463e-322f */ + /* 18: loadpw */ + var47.i = p3; + /* 22: loadpw */ + var48.i = p4; + /* 25: loadpw */ + var49.i = p5; + /* 31: loadpb */ + var50.x4[0] = (int) 0x00000080; /* 128 or 6.32404e-322f */ + var50.x4[1] = (int) 0x00000080; /* 128 or 6.32404e-322f */ + var50.x4[2] = (int) 0x00000080; /* 128 or 6.32404e-322f */ + var50.x4[3] = (int) 0x00000080; /* 128 or 6.32404e-322f */ + + for (i = 0; i < n; i++) { + /* 0: loadb */ + var42 = ptr4[i]; + /* 2: subb */ + var52 = var42 - var43; + /* 3: splatbw */ + var53.i = ((var52 & 0xff) << 8) | (var52 & 0xff); + /* 4: loadupdb */ + var54 = ptr5[i >> 1]; + /* 5: subb */ + var55 = var54 - var43; + /* 6: splatbw */ + var56.i = ((var55 & 0xff) << 8) | (var55 & 0xff); + /* 7: loadupdb */ + var57 = ptr6[i >> 1]; + /* 8: subb */ + var58 = var57 - var43; + /* 9: splatbw */ + var59.i = ((var58 & 0xff) << 8) | (var58 & 0xff); + /* 11: mulhsw */ + var60.i = (var53.i * var44.i) >> 16; + /* 13: mulhsw */ + var61.i = (var59.i * var45.i) >> 16; + /* 14: addw */ + var62.i = var60.i + var61.i; + /* 15: convssswb */ + var63 = ORC_CLAMP_SB (var62.i); + /* 17: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var46; + _dest.x2[1] = var63; + var64.i = _dest.i; + } + /* 19: mulhsw */ + var65.i = (var56.i * var47.i) >> 16; + /* 20: addw */ + var66.i = var60.i + var65.i; + /* 21: convssswb */ + var67 = ORC_CLAMP_SB (var66.i); + /* 23: mulhsw */ + var68.i = (var56.i * var48.i) >> 16; + /* 24: addw */ + var69.i = var60.i + var68.i; + /* 26: mulhsw */ + var70.i = (var59.i * var49.i) >> 16; + /* 27: addw */ + var71.i = var69.i + var70.i; + /* 28: convssswb */ + var72 = ORC_CLAMP_SB (var71.i); + /* 29: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var72; + _dest.x2[1] = var67; + var73.i = _dest.i; + } + /* 30: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var64.i; + _dest.x2[1] = var73.i; + var74.i = _dest.i; + } + /* 32: addb */ + var51.x4[0] = var74.x4[0] + var50.x4[0]; + var51.x4[1] = var74.x4[1] + var50.x4[1]; + var51.x4[2] = var74.x4[2] + var50.x4[2]; + var51.x4[3] = var74.x4[3] + var50.x4[3]; + /* 33: storel */ + ptr0[i] = var51; + } + +} + +#else +static void +_backup_video_orc_convert_I420_ARGB (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int n = ex->n; + orc_union32 *ORC_RESTRICT ptr0; + const orc_int8 *ORC_RESTRICT ptr4; + const orc_int8 *ORC_RESTRICT ptr5; + const orc_int8 *ORC_RESTRICT ptr6; + orc_int8 var42; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_int8 var43; +#else + orc_int8 var43; +#endif + orc_union16 var44; + orc_union16 var45; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_int8 var46; +#else + orc_int8 var46; +#endif + orc_union16 var47; + orc_union16 var48; + orc_union16 var49; +#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__) + volatile orc_union32 var50; +#else + orc_union32 var50; +#endif + orc_union32 var51; + orc_int8 var52; + orc_union16 var53; + orc_int8 var54; + orc_int8 var55; + orc_union16 var56; + orc_int8 var57; + orc_int8 var58; + orc_union16 var59; + orc_union16 var60; + orc_union16 var61; + orc_union16 var62; + orc_int8 var63; + orc_union16 var64; + orc_union16 var65; + orc_union16 var66; + orc_int8 var67; + orc_union16 var68; + orc_union16 var69; + orc_union16 var70; + orc_union16 var71; + orc_int8 var72; + orc_union16 var73; + orc_union32 var74; + + ptr0 = (orc_union32 *) ex->arrays[0]; + ptr4 = (orc_int8 *) ex->arrays[4]; + ptr5 = (orc_int8 *) ex->arrays[5]; + ptr6 = (orc_int8 *) ex->arrays[6]; + + /* 1: loadpb */ + var43 = (int) 0x00000080; /* 128 or 6.32404e-322f */ + /* 10: loadpw */ + var44.i = ex->params[24]; + /* 12: loadpw */ + var45.i = ex->params[25]; + /* 16: loadpb */ + var46 = (int) 0x0000007f; /* 127 or 6.27463e-322f */ + /* 18: loadpw */ + var47.i = ex->params[26]; + /* 22: loadpw */ + var48.i = ex->params[27]; + /* 25: loadpw */ + var49.i = ex->params[28]; + /* 31: loadpb */ + var50.x4[0] = (int) 0x00000080; /* 128 or 6.32404e-322f */ + var50.x4[1] = (int) 0x00000080; /* 128 or 6.32404e-322f */ + var50.x4[2] = (int) 0x00000080; /* 128 or 6.32404e-322f */ + var50.x4[3] = (int) 0x00000080; /* 128 or 6.32404e-322f */ + + for (i = 0; i < n; i++) { + /* 0: loadb */ + var42 = ptr4[i]; + /* 2: subb */ + var52 = var42 - var43; + /* 3: splatbw */ + var53.i = ((var52 & 0xff) << 8) | (var52 & 0xff); + /* 4: loadupdb */ + var54 = ptr5[i >> 1]; + /* 5: subb */ + var55 = var54 - var43; + /* 6: splatbw */ + var56.i = ((var55 & 0xff) << 8) | (var55 & 0xff); + /* 7: loadupdb */ + var57 = ptr6[i >> 1]; + /* 8: subb */ + var58 = var57 - var43; + /* 9: splatbw */ + var59.i = ((var58 & 0xff) << 8) | (var58 & 0xff); + /* 11: mulhsw */ + var60.i = (var53.i * var44.i) >> 16; + /* 13: mulhsw */ + var61.i = (var59.i * var45.i) >> 16; + /* 14: addw */ + var62.i = var60.i + var61.i; + /* 15: convssswb */ + var63 = ORC_CLAMP_SB (var62.i); + /* 17: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var46; + _dest.x2[1] = var63; + var64.i = _dest.i; + } + /* 19: mulhsw */ + var65.i = (var56.i * var47.i) >> 16; + /* 20: addw */ + var66.i = var60.i + var65.i; + /* 21: convssswb */ + var67 = ORC_CLAMP_SB (var66.i); + /* 23: mulhsw */ + var68.i = (var56.i * var48.i) >> 16; + /* 24: addw */ + var69.i = var60.i + var68.i; + /* 26: mulhsw */ + var70.i = (var59.i * var49.i) >> 16; + /* 27: addw */ + var71.i = var69.i + var70.i; + /* 28: convssswb */ + var72 = ORC_CLAMP_SB (var71.i); + /* 29: mergebw */ + { + orc_union16 _dest; + _dest.x2[0] = var72; + _dest.x2[1] = var67; + var73.i = _dest.i; + } + /* 30: mergewl */ + { + orc_union32 _dest; + _dest.x2[0] = var64.i; + _dest.x2[1] = var73.i; + var74.i = _dest.i; + } + /* 32: addb */ + var51.x4[0] = var74.x4[0] + var50.x4[0]; + var51.x4[1] = var74.x4[1] + var50.x4[1]; + var51.x4[2] = var74.x4[2] + var50.x4[2]; + var51.x4[3] = var74.x4[3] + var50.x4[3]; + /* 33: storel */ + ptr0[i] = var51; + } + +} + +void +video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, + const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, + const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, + int n) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 111, 110, + 118, 101, 114, 116, 95, 73, 52, 50, 48, 95, 65, 82, 71, 66, 11, 4, + 4, 12, 1, 1, 12, 1, 1, 12, 1, 1, 14, 1, 128, 0, 0, 0, + 14, 1, 127, 0, 0, 0, 16, 2, 16, 2, 16, 2, 16, 2, 16, 2, + 20, 2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 1, 20, 1, + 20, 1, 20, 4, 65, 38, 4, 16, 151, 32, 38, 45, 38, 5, 65, 38, + 38, 16, 151, 33, 38, 45, 38, 6, 65, 38, 38, 16, 151, 34, 38, 90, + 32, 32, 24, 90, 35, 34, 25, 70, 35, 32, 35, 159, 38, 35, 196, 35, + 17, 38, 90, 37, 33, 26, 70, 37, 32, 37, 159, 40, 37, 90, 36, 33, + 27, 70, 36, 32, 36, 90, 32, 34, 28, 70, 36, 36, 32, 159, 39, 36, + 196, 37, 39, 40, 195, 41, 35, 37, 21, 2, 33, 0, 41, 16, 2, 0, + + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_video_orc_convert_I420_ARGB); +#else + p = orc_program_new (); + orc_program_set_name (p, "video_orc_convert_I420_ARGB"); + orc_program_set_backup_function (p, _backup_video_orc_convert_I420_ARGB); + orc_program_add_destination (p, 4, "d1"); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_source (p, 1, "s3"); + orc_program_add_constant (p, 1, 0x00000080, "c1"); + orc_program_add_constant (p, 1, 0x0000007f, "c2"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_parameter (p, 2, "p2"); + orc_program_add_parameter (p, 2, "p3"); + orc_program_add_parameter (p, 2, "p4"); + orc_program_add_parameter (p, 2, "p5"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 2, "t3"); + orc_program_add_temporary (p, 2, "t4"); + orc_program_add_temporary (p, 2, "t5"); + orc_program_add_temporary (p, 2, "t6"); + orc_program_add_temporary (p, 1, "t7"); + orc_program_add_temporary (p, 1, "t8"); + orc_program_add_temporary (p, 1, "t9"); + orc_program_add_temporary (p, 4, "t10"); + + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_S1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T1, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S2, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T2, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S3, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T7, ORC_VAR_T4, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_C2, ORC_VAR_T7, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_P3, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T9, ORC_VAR_T6, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_P4, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_P5, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T8, ORC_VAR_T5, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_T9, + ORC_VAR_D1); + orc_program_append_2 (p, "mergewl", 0, ORC_VAR_T10, ORC_VAR_T4, + ORC_VAR_T6, ORC_VAR_D1); + orc_program_append_2 (p, "addb", 2, ORC_VAR_D1, ORC_VAR_T10, ORC_VAR_C1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ex->arrays[ORC_VAR_D1] = d1; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->arrays[ORC_VAR_S2] = (void *) s2; + ex->arrays[ORC_VAR_S3] = (void *) s3; + ex->params[ORC_VAR_P1] = p1; + ex->params[ORC_VAR_P2] = p2; + ex->params[ORC_VAR_P3] = p3; + ex->params[ORC_VAR_P4] = p4; + ex->params[ORC_VAR_P5] = p5; + + func = c->exec; + func (ex); +} +#endif + + /* video_orc_matrix8 */ #ifdef DISABLE_ORC void diff --git a/gst-libs/gst/video/video-orc-dist.h b/gst-libs/gst/video/video-orc-dist.h index c2fef7d7e1..d6a2aa6647 100644 --- a/gst-libs/gst/video/video-orc-dist.h +++ b/gst-libs/gst/video/video-orc-dist.h @@ -177,6 +177,7 @@ void video_orc_convert_AYUV_BGRA (guint8 * ORC_RESTRICT d1, int d1_stride, const void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m); void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m); void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n); +void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n); void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n); void _custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n); void video_orc_resample_h_near_u32_lq (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int p1, int p2, int n); diff --git a/gst-libs/gst/video/video-orc.orc b/gst-libs/gst/video/video-orc.orc index 60d7c95d16..5af6c90ab0 100644 --- a/gst-libs/gst/video/video-orc.orc +++ b/gst-libs/gst/video/video-orc.orc @@ -1663,6 +1663,7 @@ x4 addb argb, x, c128 .temp 1 b .temp 4 x .const 1 c128 128 +.const 4 c4128 128 subb r, y, c128 splatbw wy, r @@ -1693,7 +1694,61 @@ convssswb g, wg mergebw wb, b, g mergewl x, wb, wr -x4 addb argb, x, c128 +x4 addb argb, x, c4128 + +.function video_orc_convert_I420_ARGB +.dest 4 argb guint8 +.source 1 y guint8 +.source 1 u guint8 +.source 1 v guint8 +.param 2 p1 +.param 2 p2 +.param 2 p3 +.param 2 p4 +.param 2 p5 +.temp 2 wy +.temp 2 wu +.temp 2 wv +.temp 2 wr +.temp 2 wg +.temp 2 wb +.temp 1 r +.temp 1 g +.temp 1 b +.temp 4 x +.const 1 c128 128 +.const 4 c4128 128 + +subb r, y, c128 +splatbw wy, r +loadupdb r, u +subb r, r, c128 +splatbw wu, r +loadupdb r, v +subb r, r, c128 +splatbw wv, r + +mulhsw wy, wy, p1 + +mulhsw wr, wv, p2 +addw wr, wy, wr +convssswb r, wr +mergebw wr, 127, r + +mulhsw wb, wu, p3 +addw wb, wy, wb +convssswb b, wb + +mulhsw wg, wu, p4 +addw wg, wy, wg +mulhsw wy, wv, p5 +addw wg, wg, wy + +convssswb g, wg + +mergebw wb, g, b +mergewl x, wr, wb +x4 addb argb, x, c4128 .function video_orc_matrix8 .backup _custom_video_orc_matrix8