video-converter: add more fastpaths for I420 -> RGB

Use the I420->BGRA and a new I420->ARGB to speed up any I420 to RGB
operation.
This commit is contained in:
Wim Taymans 2016-04-22 15:07:10 +02:00
parent 1439b61694
commit a12f51c3de
4 changed files with 648 additions and 2 deletions

View file

@ -3599,9 +3599,93 @@ convert_I420_BGRA (GstVideoConverter * convert, const GstVideoFrame * src,
sv = FRAME_GET_V_LINE (src, (i + convert->in_y) >> 1);
sv += (convert->in_x >> 1);
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
video_orc_convert_I420_BGRA (d, sy, su, sv,
data->im[0][0], data->im[0][2],
data->im[2][1], data->im[1][1], data->im[1][2], width);
#else
video_orc_convert_I420_ARGB (d, sy, su, sv,
data->im[0][0], data->im[0][2],
data->im[2][1], data->im[1][1], data->im[1][2], width);
#endif
}
convert_fill_border (convert, dest);
}
static void
convert_I420_ARGB (GstVideoConverter * convert, const GstVideoFrame * src,
GstVideoFrame * dest)
{
int i;
gint width = convert->in_width;
gint height = convert->in_height;
MatrixData *data = &convert->convert_matrix;
for (i = 0; i < height; i++) {
guint8 *sy, *su, *sv, *d;
d = FRAME_GET_LINE (dest, i + convert->out_y);
d += (convert->out_x * 4);
sy = FRAME_GET_Y_LINE (src, i + convert->in_y);
sy += convert->in_x;
su = FRAME_GET_U_LINE (src, (i + convert->in_y) >> 1);
su += (convert->in_x >> 1);
sv = FRAME_GET_V_LINE (src, (i + convert->in_y) >> 1);
sv += (convert->in_x >> 1);
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
video_orc_convert_I420_ARGB (d, sy, su, sv,
data->im[0][0], data->im[0][2],
data->im[2][1], data->im[1][1], data->im[1][2], width);
#else
video_orc_convert_I420_BGRA (d, sy, su, sv,
data->im[0][0], data->im[0][2],
data->im[2][1], data->im[1][1], data->im[1][2], width);
#endif
}
convert_fill_border (convert, dest);
}
static void
convert_I420_pack_ARGB (GstVideoConverter * convert, const GstVideoFrame * src,
GstVideoFrame * dest)
{
int i;
gint width = convert->in_width;
gint height = convert->in_height;
MatrixData *data = &convert->convert_matrix;
gpointer tmp = convert->tmpline;
gpointer d[GST_VIDEO_MAX_PLANES];
gint pstride = GST_VIDEO_FORMAT_INFO_PSTRIDE (dest->info.finfo, 0);
d[0] = FRAME_GET_LINE (dest, 0);
d[0] = (guint8 *) d[0] + convert->out_x * pstride;
for (i = 0; i < height; i++) {
guint8 *sy, *su, *sv;
sy = FRAME_GET_Y_LINE (src, i + convert->in_y);
sy += convert->in_x;
su = FRAME_GET_U_LINE (src, (i + convert->in_y) >> 1);
su += (convert->in_x >> 1);
sv = FRAME_GET_V_LINE (src, (i + convert->in_y) >> 1);
sv += (convert->in_x >> 1);
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
video_orc_convert_I420_ARGB (tmp, sy, su, sv,
data->im[0][0], data->im[0][2],
data->im[2][1], data->im[1][1], data->im[1][2], width);
#else
video_orc_convert_I420_BGRA (tmp, sy, su, sv,
data->im[0][0], data->im[0][2],
data->im[2][1], data->im[1][1], data->im[1][2], width);
#endif
dest->info.finfo->pack_func (dest->info.finfo,
(GST_VIDEO_FRAME_IS_INTERLACED (dest) ?
GST_VIDEO_PACK_FLAG_INTERLACED :
GST_VIDEO_PACK_FLAG_NONE),
tmp, 0, d, dest->info.stride,
dest->info.chroma_site, i + convert->out_y, width);
}
convert_fill_border (convert, dest);
}
@ -4611,6 +4695,7 @@ static const VideoTransform transforms[] = {
FALSE, FALSE, FALSE, 0, 0, convert_AYUV_ABGR}, /* alias */
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_RGBx, TRUE, TRUE, TRUE, TRUE, TRUE,
FALSE, FALSE, FALSE, 0, 0, convert_AYUV_RGBA}, /* alias */
#endif
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGRA, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA},
@ -4620,7 +4705,57 @@ static const VideoTransform transforms[] = {
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGRx, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA},
#endif
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_ARGB, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_xRGB, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_ARGB, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_xRGB, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_ABGR, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_xBGR, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGBA, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGBx, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGB, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGR, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGB15, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGR15, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_RGB16, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_ABGR, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_xBGR, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGBA, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGBx, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGB, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGB15, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR15, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_RGB16, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
/* scalers */
{GST_VIDEO_FORMAT_GBR, GST_VIDEO_FORMAT_GBR, TRUE, FALSE, FALSE, TRUE,

View file

@ -333,6 +333,10 @@ void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5,
int n);
void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5,
int n);
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3,
orc_int64 p4, int n);
@ -22026,6 +22030,457 @@ video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1,
#endif
/* video_orc_convert_I420_ARGB */
#ifdef DISABLE_ORC
void
video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5,
int n)
{
int i;
orc_union32 *ORC_RESTRICT ptr0;
const orc_int8 *ORC_RESTRICT ptr4;
const orc_int8 *ORC_RESTRICT ptr5;
const orc_int8 *ORC_RESTRICT ptr6;
orc_int8 var42;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_int8 var43;
#else
orc_int8 var43;
#endif
orc_union16 var44;
orc_union16 var45;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_int8 var46;
#else
orc_int8 var46;
#endif
orc_union16 var47;
orc_union16 var48;
orc_union16 var49;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var50;
#else
orc_union32 var50;
#endif
orc_union32 var51;
orc_int8 var52;
orc_union16 var53;
orc_int8 var54;
orc_int8 var55;
orc_union16 var56;
orc_int8 var57;
orc_int8 var58;
orc_union16 var59;
orc_union16 var60;
orc_union16 var61;
orc_union16 var62;
orc_int8 var63;
orc_union16 var64;
orc_union16 var65;
orc_union16 var66;
orc_int8 var67;
orc_union16 var68;
orc_union16 var69;
orc_union16 var70;
orc_union16 var71;
orc_int8 var72;
orc_union16 var73;
orc_union32 var74;
ptr0 = (orc_union32 *) d1;
ptr4 = (orc_int8 *) s1;
ptr5 = (orc_int8 *) s2;
ptr6 = (orc_int8 *) s3;
/* 1: loadpb */
var43 = (int) 0x00000080; /* 128 or 6.32404e-322f */
/* 10: loadpw */
var44.i = p1;
/* 12: loadpw */
var45.i = p2;
/* 16: loadpb */
var46 = (int) 0x0000007f; /* 127 or 6.27463e-322f */
/* 18: loadpw */
var47.i = p3;
/* 22: loadpw */
var48.i = p4;
/* 25: loadpw */
var49.i = p5;
/* 31: loadpb */
var50.x4[0] = (int) 0x00000080; /* 128 or 6.32404e-322f */
var50.x4[1] = (int) 0x00000080; /* 128 or 6.32404e-322f */
var50.x4[2] = (int) 0x00000080; /* 128 or 6.32404e-322f */
var50.x4[3] = (int) 0x00000080; /* 128 or 6.32404e-322f */
for (i = 0; i < n; i++) {
/* 0: loadb */
var42 = ptr4[i];
/* 2: subb */
var52 = var42 - var43;
/* 3: splatbw */
var53.i = ((var52 & 0xff) << 8) | (var52 & 0xff);
/* 4: loadupdb */
var54 = ptr5[i >> 1];
/* 5: subb */
var55 = var54 - var43;
/* 6: splatbw */
var56.i = ((var55 & 0xff) << 8) | (var55 & 0xff);
/* 7: loadupdb */
var57 = ptr6[i >> 1];
/* 8: subb */
var58 = var57 - var43;
/* 9: splatbw */
var59.i = ((var58 & 0xff) << 8) | (var58 & 0xff);
/* 11: mulhsw */
var60.i = (var53.i * var44.i) >> 16;
/* 13: mulhsw */
var61.i = (var59.i * var45.i) >> 16;
/* 14: addw */
var62.i = var60.i + var61.i;
/* 15: convssswb */
var63 = ORC_CLAMP_SB (var62.i);
/* 17: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var46;
_dest.x2[1] = var63;
var64.i = _dest.i;
}
/* 19: mulhsw */
var65.i = (var56.i * var47.i) >> 16;
/* 20: addw */
var66.i = var60.i + var65.i;
/* 21: convssswb */
var67 = ORC_CLAMP_SB (var66.i);
/* 23: mulhsw */
var68.i = (var56.i * var48.i) >> 16;
/* 24: addw */
var69.i = var60.i + var68.i;
/* 26: mulhsw */
var70.i = (var59.i * var49.i) >> 16;
/* 27: addw */
var71.i = var69.i + var70.i;
/* 28: convssswb */
var72 = ORC_CLAMP_SB (var71.i);
/* 29: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var72;
_dest.x2[1] = var67;
var73.i = _dest.i;
}
/* 30: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var64.i;
_dest.x2[1] = var73.i;
var74.i = _dest.i;
}
/* 32: addb */
var51.x4[0] = var74.x4[0] + var50.x4[0];
var51.x4[1] = var74.x4[1] + var50.x4[1];
var51.x4[2] = var74.x4[2] + var50.x4[2];
var51.x4[3] = var74.x4[3] + var50.x4[3];
/* 33: storel */
ptr0[i] = var51;
}
}
#else
static void
_backup_video_orc_convert_I420_ARGB (OrcExecutor * ORC_RESTRICT ex)
{
int i;
int n = ex->n;
orc_union32 *ORC_RESTRICT ptr0;
const orc_int8 *ORC_RESTRICT ptr4;
const orc_int8 *ORC_RESTRICT ptr5;
const orc_int8 *ORC_RESTRICT ptr6;
orc_int8 var42;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_int8 var43;
#else
orc_int8 var43;
#endif
orc_union16 var44;
orc_union16 var45;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_int8 var46;
#else
orc_int8 var46;
#endif
orc_union16 var47;
orc_union16 var48;
orc_union16 var49;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var50;
#else
orc_union32 var50;
#endif
orc_union32 var51;
orc_int8 var52;
orc_union16 var53;
orc_int8 var54;
orc_int8 var55;
orc_union16 var56;
orc_int8 var57;
orc_int8 var58;
orc_union16 var59;
orc_union16 var60;
orc_union16 var61;
orc_union16 var62;
orc_int8 var63;
orc_union16 var64;
orc_union16 var65;
orc_union16 var66;
orc_int8 var67;
orc_union16 var68;
orc_union16 var69;
orc_union16 var70;
orc_union16 var71;
orc_int8 var72;
orc_union16 var73;
orc_union32 var74;
ptr0 = (orc_union32 *) ex->arrays[0];
ptr4 = (orc_int8 *) ex->arrays[4];
ptr5 = (orc_int8 *) ex->arrays[5];
ptr6 = (orc_int8 *) ex->arrays[6];
/* 1: loadpb */
var43 = (int) 0x00000080; /* 128 or 6.32404e-322f */
/* 10: loadpw */
var44.i = ex->params[24];
/* 12: loadpw */
var45.i = ex->params[25];
/* 16: loadpb */
var46 = (int) 0x0000007f; /* 127 or 6.27463e-322f */
/* 18: loadpw */
var47.i = ex->params[26];
/* 22: loadpw */
var48.i = ex->params[27];
/* 25: loadpw */
var49.i = ex->params[28];
/* 31: loadpb */
var50.x4[0] = (int) 0x00000080; /* 128 or 6.32404e-322f */
var50.x4[1] = (int) 0x00000080; /* 128 or 6.32404e-322f */
var50.x4[2] = (int) 0x00000080; /* 128 or 6.32404e-322f */
var50.x4[3] = (int) 0x00000080; /* 128 or 6.32404e-322f */
for (i = 0; i < n; i++) {
/* 0: loadb */
var42 = ptr4[i];
/* 2: subb */
var52 = var42 - var43;
/* 3: splatbw */
var53.i = ((var52 & 0xff) << 8) | (var52 & 0xff);
/* 4: loadupdb */
var54 = ptr5[i >> 1];
/* 5: subb */
var55 = var54 - var43;
/* 6: splatbw */
var56.i = ((var55 & 0xff) << 8) | (var55 & 0xff);
/* 7: loadupdb */
var57 = ptr6[i >> 1];
/* 8: subb */
var58 = var57 - var43;
/* 9: splatbw */
var59.i = ((var58 & 0xff) << 8) | (var58 & 0xff);
/* 11: mulhsw */
var60.i = (var53.i * var44.i) >> 16;
/* 13: mulhsw */
var61.i = (var59.i * var45.i) >> 16;
/* 14: addw */
var62.i = var60.i + var61.i;
/* 15: convssswb */
var63 = ORC_CLAMP_SB (var62.i);
/* 17: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var46;
_dest.x2[1] = var63;
var64.i = _dest.i;
}
/* 19: mulhsw */
var65.i = (var56.i * var47.i) >> 16;
/* 20: addw */
var66.i = var60.i + var65.i;
/* 21: convssswb */
var67 = ORC_CLAMP_SB (var66.i);
/* 23: mulhsw */
var68.i = (var56.i * var48.i) >> 16;
/* 24: addw */
var69.i = var60.i + var68.i;
/* 26: mulhsw */
var70.i = (var59.i * var49.i) >> 16;
/* 27: addw */
var71.i = var69.i + var70.i;
/* 28: convssswb */
var72 = ORC_CLAMP_SB (var71.i);
/* 29: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var72;
_dest.x2[1] = var67;
var73.i = _dest.i;
}
/* 30: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var64.i;
_dest.x2[1] = var73.i;
var74.i = _dest.i;
}
/* 32: addb */
var51.x4[0] = var74.x4[0] + var50.x4[0];
var51.x4[1] = var74.x4[1] + var50.x4[1];
var51.x4[2] = var74.x4[2] + var50.x4[2];
var51.x4[3] = var74.x4[3] + var50.x4[3];
/* 33: storel */
ptr0[i] = var51;
}
}
void
video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5,
int n)
{
OrcExecutor _ex, *ex = &_ex;
static volatile int p_inited = 0;
static OrcCode *c = 0;
void (*func) (OrcExecutor *);
if (!p_inited) {
orc_once_mutex_lock ();
if (!p_inited) {
OrcProgram *p;
#if 1
static const orc_uint8 bc[] = {
1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 111, 110,
118, 101, 114, 116, 95, 73, 52, 50, 48, 95, 65, 82, 71, 66, 11, 4,
4, 12, 1, 1, 12, 1, 1, 12, 1, 1, 14, 1, 128, 0, 0, 0,
14, 1, 127, 0, 0, 0, 16, 2, 16, 2, 16, 2, 16, 2, 16, 2,
20, 2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 1, 20, 1,
20, 1, 20, 4, 65, 38, 4, 16, 151, 32, 38, 45, 38, 5, 65, 38,
38, 16, 151, 33, 38, 45, 38, 6, 65, 38, 38, 16, 151, 34, 38, 90,
32, 32, 24, 90, 35, 34, 25, 70, 35, 32, 35, 159, 38, 35, 196, 35,
17, 38, 90, 37, 33, 26, 70, 37, 32, 37, 159, 40, 37, 90, 36, 33,
27, 70, 36, 32, 36, 90, 32, 34, 28, 70, 36, 36, 32, 159, 39, 36,
196, 37, 39, 40, 195, 41, 35, 37, 21, 2, 33, 0, 41, 16, 2, 0,
};
p = orc_program_new_from_static_bytecode (bc);
orc_program_set_backup_function (p, _backup_video_orc_convert_I420_ARGB);
#else
p = orc_program_new ();
orc_program_set_name (p, "video_orc_convert_I420_ARGB");
orc_program_set_backup_function (p, _backup_video_orc_convert_I420_ARGB);
orc_program_add_destination (p, 4, "d1");
orc_program_add_source (p, 1, "s1");
orc_program_add_source (p, 1, "s2");
orc_program_add_source (p, 1, "s3");
orc_program_add_constant (p, 1, 0x00000080, "c1");
orc_program_add_constant (p, 1, 0x0000007f, "c2");
orc_program_add_parameter (p, 2, "p1");
orc_program_add_parameter (p, 2, "p2");
orc_program_add_parameter (p, 2, "p3");
orc_program_add_parameter (p, 2, "p4");
orc_program_add_parameter (p, 2, "p5");
orc_program_add_temporary (p, 2, "t1");
orc_program_add_temporary (p, 2, "t2");
orc_program_add_temporary (p, 2, "t3");
orc_program_add_temporary (p, 2, "t4");
orc_program_add_temporary (p, 2, "t5");
orc_program_add_temporary (p, 2, "t6");
orc_program_add_temporary (p, 1, "t7");
orc_program_add_temporary (p, 1, "t8");
orc_program_add_temporary (p, 1, "t9");
orc_program_add_temporary (p, 4, "t10");
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_S1, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T1, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S2,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T2, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S3,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T4,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T7, ORC_VAR_T4,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_C2, ORC_VAR_T7,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_P3,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T6,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T9, ORC_VAR_T6,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_P4,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T5,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_P5,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T8, ORC_VAR_T5,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_T9,
ORC_VAR_D1);
orc_program_append_2 (p, "mergewl", 0, ORC_VAR_T10, ORC_VAR_T4,
ORC_VAR_T6, ORC_VAR_D1);
orc_program_append_2 (p, "addb", 2, ORC_VAR_D1, ORC_VAR_T10, ORC_VAR_C1,
ORC_VAR_D1);
#endif
orc_program_compile (p);
c = orc_program_take_code (p);
orc_program_free (p);
}
p_inited = TRUE;
orc_once_mutex_unlock ();
}
ex->arrays[ORC_VAR_A2] = c;
ex->program = 0;
ex->n = n;
ex->arrays[ORC_VAR_D1] = d1;
ex->arrays[ORC_VAR_S1] = (void *) s1;
ex->arrays[ORC_VAR_S2] = (void *) s2;
ex->arrays[ORC_VAR_S3] = (void *) s3;
ex->params[ORC_VAR_P1] = p1;
ex->params[ORC_VAR_P2] = p2;
ex->params[ORC_VAR_P3] = p3;
ex->params[ORC_VAR_P4] = p4;
ex->params[ORC_VAR_P5] = p5;
func = c->exec;
func (ex);
}
#endif
/* video_orc_matrix8 */
#ifdef DISABLE_ORC
void

View file

@ -177,6 +177,7 @@ void video_orc_convert_AYUV_BGRA (guint8 * ORC_RESTRICT d1, int d1_stride, const
void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n);
void _custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n);
void video_orc_resample_h_near_u32_lq (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int p1, int p2, int n);

View file

@ -1663,6 +1663,7 @@ x4 addb argb, x, c128
.temp 1 b
.temp 4 x
.const 1 c128 128
.const 4 c4128 128
subb r, y, c128
splatbw wy, r
@ -1693,7 +1694,61 @@ convssswb g, wg
mergebw wb, b, g
mergewl x, wb, wr
x4 addb argb, x, c128
x4 addb argb, x, c4128
.function video_orc_convert_I420_ARGB
.dest 4 argb guint8
.source 1 y guint8
.source 1 u guint8
.source 1 v guint8
.param 2 p1
.param 2 p2
.param 2 p3
.param 2 p4
.param 2 p5
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 4 x
.const 1 c128 128
.const 4 c4128 128
subb r, y, c128
splatbw wy, r
loadupdb r, u
subb r, r, c128
splatbw wu, r
loadupdb r, v
subb r, r, c128
splatbw wv, r
mulhsw wy, wy, p1
mulhsw wr, wv, p2
addw wr, wy, wr
convssswb r, wr
mergebw wr, 127, r
mulhsw wb, wu, p3
addw wb, wy, wb
convssswb b, wb
mulhsw wg, wu, p4
addw wg, wy, wg
mulhsw wy, wv, p5
addw wg, wg, wy
convssswb g, wg
mergebw wb, g, b
mergewl x, wr, wb
x4 addb argb, x, c4128
.function video_orc_matrix8
.backup _custom_video_orc_matrix8