video-converter: Add support for A420 to RGB fast path

Add fast path for A420 -> RGB format conversion

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/-/merge_requests/1245>
This commit is contained in:
Seungha Yang 2021-08-05 19:59:38 +09:00 committed by GStreamer Marge Bot
parent f5a79ce05f
commit c654f86859
4 changed files with 1228 additions and 4 deletions

View file

@ -5918,6 +5918,175 @@ convert_I420_pack_ARGB (GstVideoConverter * convert, const GstVideoFrame * src,
convert_fill_border (convert, dest);
}
static void
convert_A420_pack_ARGB_task (FConvertTask * task)
{
gint i;
gpointer d[GST_VIDEO_MAX_PLANES];
d[0] = FRAME_GET_LINE (task->dest, 0);
d[0] =
(guint8 *) d[0] +
task->out_x * GST_VIDEO_FORMAT_INFO_PSTRIDE (task->dest->info.finfo, 0);
for (i = task->height_0; i < task->height_1; i++) {
guint8 *sy, *su, *sv, *sa;
sy = FRAME_GET_Y_LINE (task->src, i + task->in_y);
sy += task->in_x;
su = FRAME_GET_U_LINE (task->src, (i + task->in_y) >> 1);
su += (task->in_x >> 1);
sv = FRAME_GET_V_LINE (task->src, (i + task->in_y) >> 1);
sv += (task->in_x >> 1);
sa = FRAME_GET_A_LINE (task->src, i + task->in_y);
sa += task->in_x;
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
video_orc_convert_A420_ARGB (task->tmpline, sy, su, sv, sa,
task->data->im[0][0], task->data->im[0][2],
task->data->im[2][1], task->data->im[1][1], task->data->im[1][2],
task->width);
#else
video_orc_convert_A420_BGRA (task->tmpline, sy, su, sv, sa,
task->data->im[0][0], task->data->im[0][2],
task->data->im[2][1], task->data->im[1][1], task->data->im[1][2],
task->width);
#endif
task->dest->info.finfo->pack_func (task->dest->info.finfo,
(GST_VIDEO_FRAME_IS_INTERLACED (task->dest) ?
GST_VIDEO_PACK_FLAG_INTERLACED :
GST_VIDEO_PACK_FLAG_NONE),
task->tmpline, 0, d, task->dest->info.stride,
task->dest->info.chroma_site, i + task->out_y, task->width);
}
}
static void
convert_A420_pack_ARGB (GstVideoConverter * convert, const GstVideoFrame * src,
GstVideoFrame * dest)
{
int i;
gint width = convert->in_width;
gint height = convert->in_height;
MatrixData *data = &convert->convert_matrix;
FConvertTask *tasks;
FConvertTask **tasks_p;
gint n_threads;
gint lines_per_thread;
n_threads = convert->conversion_runner->n_threads;
tasks = convert->tasks[0] =
g_renew (FConvertTask, convert->tasks[0], n_threads);
tasks_p = convert->tasks_p[0] =
g_renew (FConvertTask *, convert->tasks_p[0], n_threads);
lines_per_thread = (height + n_threads - 1) / n_threads;
for (i = 0; i < n_threads; i++) {
tasks[i].src = src;
tasks[i].dest = dest;
tasks[i].width = width;
tasks[i].data = data;
tasks[i].in_x = convert->in_x;
tasks[i].in_y = convert->in_y;
tasks[i].out_x = convert->out_x;
tasks[i].out_y = convert->out_y;
tasks[i].tmpline = convert->tmpline[i];
tasks[i].height_0 = i * lines_per_thread;
tasks[i].height_1 = tasks[i].height_0 + lines_per_thread;
tasks[i].height_1 = MIN (height, tasks[i].height_1);
tasks_p[i] = &tasks[i];
}
gst_parallelized_task_runner_run (convert->conversion_runner,
(GstParallelizedTaskFunc) convert_A420_pack_ARGB_task,
(gpointer) tasks_p);
convert_fill_border (convert, dest);
}
static void
convert_A420_BGRA_task (FConvertTask * task)
{
gint i;
for (i = task->height_0; i < task->height_1; i++) {
guint8 *sy, *su, *sv, *sa, *d;
d = FRAME_GET_LINE (task->dest, i + task->out_y);
d += (task->out_x * 4);
sy = FRAME_GET_Y_LINE (task->src, i + task->in_y);
sy += task->in_x;
su = FRAME_GET_U_LINE (task->src, (i + task->in_y) >> 1);
su += (task->in_x >> 1);
sv = FRAME_GET_V_LINE (task->src, (i + task->in_y) >> 1);
sv += (task->in_x >> 1);
sa = FRAME_GET_A_LINE (task->src, i + task->in_y);
sa += task->in_x;
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
video_orc_convert_A420_BGRA (d, sy, su, sv, sa,
task->data->im[0][0], task->data->im[0][2],
task->data->im[2][1], task->data->im[1][1], task->data->im[1][2],
task->width);
#else
video_orc_convert_A420_ARGB (d, sy, su, sv, sa,
task->data->im[0][0], task->data->im[0][2],
task->data->im[2][1], task->data->im[1][1], task->data->im[1][2],
task->width);
#endif
}
}
static void
convert_A420_BGRA (GstVideoConverter * convert, const GstVideoFrame * src,
GstVideoFrame * dest)
{
int i;
gint width = convert->in_width;
gint height = convert->in_height;
MatrixData *data = &convert->convert_matrix;
FConvertTask *tasks;
FConvertTask **tasks_p;
gint n_threads;
gint lines_per_thread;
n_threads = convert->conversion_runner->n_threads;
tasks = convert->tasks[0] =
g_renew (FConvertTask, convert->tasks[0], n_threads);
tasks_p = convert->tasks_p[0] =
g_renew (FConvertTask *, convert->tasks_p[0], n_threads);
lines_per_thread = (height + n_threads - 1) / n_threads;
for (i = 0; i < n_threads; i++) {
tasks[i].src = src;
tasks[i].dest = dest;
tasks[i].width = width;
tasks[i].data = data;
tasks[i].in_x = convert->in_x;
tasks[i].in_y = convert->in_y;
tasks[i].out_x = convert->out_x;
tasks[i].out_y = convert->out_y;
tasks[i].height_0 = i * lines_per_thread;
tasks[i].height_1 = tasks[i].height_0 + lines_per_thread;
tasks[i].height_1 = MIN (height, tasks[i].height_1);
tasks_p[i] = &tasks[i];
}
gst_parallelized_task_runner_run (convert->conversion_runner,
(GstParallelizedTaskFunc) convert_A420_BGRA_task, (gpointer) tasks_p);
convert_fill_border (convert, dest);
}
static void
memset_u24 (guint8 * data, guint8 col[3], unsigned int n)
{
@ -7372,6 +7541,28 @@ static const VideoTransform transforms[] = {
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_ABGR, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_pack_ARGB},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGBA, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_pack_ARGB},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGRA, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_BGRA},
/* A420 to non-alpha RGB formats, reuse I420_* method */
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGRx, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_xBGR, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGBx, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGB, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGR, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGB15, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
/* scalers */
{GST_VIDEO_FORMAT_GBR, GST_VIDEO_FORMAT_GBR, TRUE, FALSE, FALSE, TRUE,
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_scale_planes},

View file

@ -360,6 +360,14 @@ void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5,
int n);
void video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
int p2, int p3, int p4, int p5, int n);
void video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
int p2, int p3, int p4, int p5, int n);
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3,
orc_int64 p4, int n);
@ -24295,6 +24303,920 @@ video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1,
#endif
/* video_orc_convert_A420_ARGB */
#ifdef DISABLE_ORC
void
video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
int p2, int p3, int p4, int p5, int n)
{
int i;
orc_union32 *ORC_RESTRICT ptr0;
const orc_int8 *ORC_RESTRICT ptr4;
const orc_int8 *ORC_RESTRICT ptr5;
const orc_int8 *ORC_RESTRICT ptr6;
const orc_int8 *ORC_RESTRICT ptr7;
orc_int8 var43;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_int8 var44;
#else
orc_int8 var44;
#endif
orc_int8 var45;
orc_union16 var46;
orc_union16 var47;
orc_union16 var48;
orc_union16 var49;
orc_union16 var50;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var51;
#else
orc_union32 var51;
#endif
orc_union32 var52;
orc_int8 var53;
orc_union16 var54;
orc_int8 var55;
orc_int8 var56;
orc_union16 var57;
orc_int8 var58;
orc_int8 var59;
orc_union16 var60;
orc_int8 var61;
orc_union16 var62;
orc_union16 var63;
orc_union16 var64;
orc_int8 var65;
orc_union16 var66;
orc_union16 var67;
orc_union16 var68;
orc_int8 var69;
orc_union16 var70;
orc_union16 var71;
orc_union16 var72;
orc_union16 var73;
orc_int8 var74;
orc_union16 var75;
orc_union32 var76;
ptr0 = (orc_union32 *) d1;
ptr4 = (orc_int8 *) s1;
ptr5 = (orc_int8 *) s2;
ptr6 = (orc_int8 *) s3;
ptr7 = (orc_int8 *) s4;
/* 1: loadpb */
var44 = 0x00000080; /* 128 or 6.32404e-322f */
/* 12: loadpw */
var46.i = p1;
/* 14: loadpw */
var47.i = p2;
/* 19: loadpw */
var48.i = p3;
/* 23: loadpw */
var49.i = p4;
/* 26: loadpw */
var50.i = p5;
/* 32: loadpb */
var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */
for (i = 0; i < n; i++) {
/* 0: loadb */
var43 = ptr4[i];
/* 2: subb */
var53 = var43 - var44;
/* 3: splatbw */
var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff);
/* 4: loadupdb */
var55 = ptr5[i >> 1];
/* 5: subb */
var56 = var55 - var44;
/* 6: splatbw */
var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff);
/* 7: loadupdb */
var58 = ptr6[i >> 1];
/* 8: subb */
var59 = var58 - var44;
/* 9: splatbw */
var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff);
/* 10: loadb */
var45 = ptr7[i];
/* 11: subb */
var61 = var45 - var44;
/* 13: mulhsw */
var62.i = (var54.i * var46.i) >> 16;
/* 15: mulhsw */
var63.i = (var60.i * var47.i) >> 16;
/* 16: addw */
var64.i = var62.i + var63.i;
/* 17: convssswb */
var65 = ORC_CLAMP_SB (var64.i);
/* 18: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var61;
_dest.x2[1] = var65;
var66.i = _dest.i;
}
/* 20: mulhsw */
var67.i = (var57.i * var48.i) >> 16;
/* 21: addw */
var68.i = var62.i + var67.i;
/* 22: convssswb */
var69 = ORC_CLAMP_SB (var68.i);
/* 24: mulhsw */
var70.i = (var57.i * var49.i) >> 16;
/* 25: addw */
var71.i = var62.i + var70.i;
/* 27: mulhsw */
var72.i = (var60.i * var50.i) >> 16;
/* 28: addw */
var73.i = var71.i + var72.i;
/* 29: convssswb */
var74 = ORC_CLAMP_SB (var73.i);
/* 30: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var74;
_dest.x2[1] = var69;
var75.i = _dest.i;
}
/* 31: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var66.i;
_dest.x2[1] = var75.i;
var76.i = _dest.i;
}
/* 33: addb */
var52.x4[0] = var76.x4[0] + var51.x4[0];
var52.x4[1] = var76.x4[1] + var51.x4[1];
var52.x4[2] = var76.x4[2] + var51.x4[2];
var52.x4[3] = var76.x4[3] + var51.x4[3];
/* 34: storel */
ptr0[i] = var52;
}
}
#else
static void
_backup_video_orc_convert_A420_ARGB (OrcExecutor * ORC_RESTRICT ex)
{
int i;
int n = ex->n;
orc_union32 *ORC_RESTRICT ptr0;
const orc_int8 *ORC_RESTRICT ptr4;
const orc_int8 *ORC_RESTRICT ptr5;
const orc_int8 *ORC_RESTRICT ptr6;
const orc_int8 *ORC_RESTRICT ptr7;
orc_int8 var43;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_int8 var44;
#else
orc_int8 var44;
#endif
orc_int8 var45;
orc_union16 var46;
orc_union16 var47;
orc_union16 var48;
orc_union16 var49;
orc_union16 var50;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var51;
#else
orc_union32 var51;
#endif
orc_union32 var52;
orc_int8 var53;
orc_union16 var54;
orc_int8 var55;
orc_int8 var56;
orc_union16 var57;
orc_int8 var58;
orc_int8 var59;
orc_union16 var60;
orc_int8 var61;
orc_union16 var62;
orc_union16 var63;
orc_union16 var64;
orc_int8 var65;
orc_union16 var66;
orc_union16 var67;
orc_union16 var68;
orc_int8 var69;
orc_union16 var70;
orc_union16 var71;
orc_union16 var72;
orc_union16 var73;
orc_int8 var74;
orc_union16 var75;
orc_union32 var76;
ptr0 = (orc_union32 *) ex->arrays[0];
ptr4 = (orc_int8 *) ex->arrays[4];
ptr5 = (orc_int8 *) ex->arrays[5];
ptr6 = (orc_int8 *) ex->arrays[6];
ptr7 = (orc_int8 *) ex->arrays[7];
/* 1: loadpb */
var44 = 0x00000080; /* 128 or 6.32404e-322f */
/* 12: loadpw */
var46.i = ex->params[24];
/* 14: loadpw */
var47.i = ex->params[25];
/* 19: loadpw */
var48.i = ex->params[26];
/* 23: loadpw */
var49.i = ex->params[27];
/* 26: loadpw */
var50.i = ex->params[28];
/* 32: loadpb */
var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */
for (i = 0; i < n; i++) {
/* 0: loadb */
var43 = ptr4[i];
/* 2: subb */
var53 = var43 - var44;
/* 3: splatbw */
var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff);
/* 4: loadupdb */
var55 = ptr5[i >> 1];
/* 5: subb */
var56 = var55 - var44;
/* 6: splatbw */
var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff);
/* 7: loadupdb */
var58 = ptr6[i >> 1];
/* 8: subb */
var59 = var58 - var44;
/* 9: splatbw */
var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff);
/* 10: loadb */
var45 = ptr7[i];
/* 11: subb */
var61 = var45 - var44;
/* 13: mulhsw */
var62.i = (var54.i * var46.i) >> 16;
/* 15: mulhsw */
var63.i = (var60.i * var47.i) >> 16;
/* 16: addw */
var64.i = var62.i + var63.i;
/* 17: convssswb */
var65 = ORC_CLAMP_SB (var64.i);
/* 18: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var61;
_dest.x2[1] = var65;
var66.i = _dest.i;
}
/* 20: mulhsw */
var67.i = (var57.i * var48.i) >> 16;
/* 21: addw */
var68.i = var62.i + var67.i;
/* 22: convssswb */
var69 = ORC_CLAMP_SB (var68.i);
/* 24: mulhsw */
var70.i = (var57.i * var49.i) >> 16;
/* 25: addw */
var71.i = var62.i + var70.i;
/* 27: mulhsw */
var72.i = (var60.i * var50.i) >> 16;
/* 28: addw */
var73.i = var71.i + var72.i;
/* 29: convssswb */
var74 = ORC_CLAMP_SB (var73.i);
/* 30: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var74;
_dest.x2[1] = var69;
var75.i = _dest.i;
}
/* 31: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var66.i;
_dest.x2[1] = var75.i;
var76.i = _dest.i;
}
/* 33: addb */
var52.x4[0] = var76.x4[0] + var51.x4[0];
var52.x4[1] = var76.x4[1] + var51.x4[1];
var52.x4[2] = var76.x4[2] + var51.x4[2];
var52.x4[3] = var76.x4[3] + var51.x4[3];
/* 34: storel */
ptr0[i] = var52;
}
}
void
video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
int p2, int p3, int p4, int p5, int n)
{
OrcExecutor _ex, *ex = &_ex;
static volatile int p_inited = 0;
static OrcCode *c = 0;
void (*func) (OrcExecutor *);
if (!p_inited) {
orc_once_mutex_lock ();
if (!p_inited) {
OrcProgram *p;
#if 1
static const orc_uint8 bc[] = {
1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 111, 110,
118, 101, 114, 116, 95, 65, 52, 50, 48, 95, 65, 82, 71, 66, 11, 4,
4, 12, 1, 1, 12, 1, 1, 12, 1, 1, 12, 1, 1, 14, 1, 128,
0, 0, 0, 16, 2, 16, 2, 16, 2, 16, 2, 16, 2, 20, 2, 20,
2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 1, 20, 1, 20, 1, 20,
1, 20, 4, 65, 38, 4, 16, 151, 32, 38, 45, 38, 5, 65, 38, 38,
16, 151, 33, 38, 45, 38, 6, 65, 38, 38, 16, 151, 34, 38, 65, 41,
7, 16, 90, 32, 32, 24, 90, 35, 34, 25, 70, 35, 32, 35, 159, 38,
35, 196, 35, 41, 38, 90, 37, 33, 26, 70, 37, 32, 37, 159, 40, 37,
90, 36, 33, 27, 70, 36, 32, 36, 90, 32, 34, 28, 70, 36, 36, 32,
159, 39, 36, 196, 37, 39, 40, 195, 42, 35, 37, 21, 2, 33, 0, 42,
16, 2, 0,
};
p = orc_program_new_from_static_bytecode (bc);
orc_program_set_backup_function (p, _backup_video_orc_convert_A420_ARGB);
#else
p = orc_program_new ();
orc_program_set_name (p, "video_orc_convert_A420_ARGB");
orc_program_set_backup_function (p, _backup_video_orc_convert_A420_ARGB);
orc_program_add_destination (p, 4, "d1");
orc_program_add_source (p, 1, "s1");
orc_program_add_source (p, 1, "s2");
orc_program_add_source (p, 1, "s3");
orc_program_add_source (p, 1, "s4");
orc_program_add_constant (p, 1, 0x00000080, "c1");
orc_program_add_parameter (p, 2, "p1");
orc_program_add_parameter (p, 2, "p2");
orc_program_add_parameter (p, 2, "p3");
orc_program_add_parameter (p, 2, "p4");
orc_program_add_parameter (p, 2, "p5");
orc_program_add_temporary (p, 2, "t1");
orc_program_add_temporary (p, 2, "t2");
orc_program_add_temporary (p, 2, "t3");
orc_program_add_temporary (p, 2, "t4");
orc_program_add_temporary (p, 2, "t5");
orc_program_add_temporary (p, 2, "t6");
orc_program_add_temporary (p, 1, "t7");
orc_program_add_temporary (p, 1, "t8");
orc_program_add_temporary (p, 1, "t9");
orc_program_add_temporary (p, 1, "t10");
orc_program_add_temporary (p, 4, "t11");
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_S1, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T1, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S2,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T2, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S3,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "subb", 0, ORC_VAR_T10, ORC_VAR_S4, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T4,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T7, ORC_VAR_T4,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_T10,
ORC_VAR_T7, ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_P3,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T6,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T9, ORC_VAR_T6,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_P4,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T5,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_P5,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T8, ORC_VAR_T5,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_T9,
ORC_VAR_D1);
orc_program_append_2 (p, "mergewl", 0, ORC_VAR_T11, ORC_VAR_T4,
ORC_VAR_T6, ORC_VAR_D1);
orc_program_append_2 (p, "addb", 2, ORC_VAR_D1, ORC_VAR_T11, ORC_VAR_C1,
ORC_VAR_D1);
#endif
orc_program_compile (p);
c = orc_program_take_code (p);
orc_program_free (p);
}
p_inited = TRUE;
orc_once_mutex_unlock ();
}
ex->arrays[ORC_VAR_A2] = c;
ex->program = 0;
ex->n = n;
ex->arrays[ORC_VAR_D1] = d1;
ex->arrays[ORC_VAR_S1] = (void *) s1;
ex->arrays[ORC_VAR_S2] = (void *) s2;
ex->arrays[ORC_VAR_S3] = (void *) s3;
ex->arrays[ORC_VAR_S4] = (void *) s4;
ex->params[ORC_VAR_P1] = p1;
ex->params[ORC_VAR_P2] = p2;
ex->params[ORC_VAR_P3] = p3;
ex->params[ORC_VAR_P4] = p4;
ex->params[ORC_VAR_P5] = p5;
func = c->exec;
func (ex);
}
#endif
/* video_orc_convert_A420_BGRA */
#ifdef DISABLE_ORC
void
video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
int p2, int p3, int p4, int p5, int n)
{
int i;
orc_union32 *ORC_RESTRICT ptr0;
const orc_int8 *ORC_RESTRICT ptr4;
const orc_int8 *ORC_RESTRICT ptr5;
const orc_int8 *ORC_RESTRICT ptr6;
const orc_int8 *ORC_RESTRICT ptr7;
orc_int8 var43;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_int8 var44;
#else
orc_int8 var44;
#endif
orc_int8 var45;
orc_union16 var46;
orc_union16 var47;
orc_union16 var48;
orc_union16 var49;
orc_union16 var50;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var51;
#else
orc_union32 var51;
#endif
orc_union32 var52;
orc_int8 var53;
orc_union16 var54;
orc_int8 var55;
orc_int8 var56;
orc_union16 var57;
orc_int8 var58;
orc_int8 var59;
orc_union16 var60;
orc_int8 var61;
orc_union16 var62;
orc_union16 var63;
orc_union16 var64;
orc_int8 var65;
orc_union16 var66;
orc_union16 var67;
orc_union16 var68;
orc_int8 var69;
orc_union16 var70;
orc_union16 var71;
orc_union16 var72;
orc_union16 var73;
orc_int8 var74;
orc_union16 var75;
orc_union32 var76;
ptr0 = (orc_union32 *) d1;
ptr4 = (orc_int8 *) s1;
ptr5 = (orc_int8 *) s2;
ptr6 = (orc_int8 *) s3;
ptr7 = (orc_int8 *) s4;
/* 1: loadpb */
var44 = 0x00000080; /* 128 or 6.32404e-322f */
/* 12: loadpw */
var46.i = p1;
/* 14: loadpw */
var47.i = p2;
/* 19: loadpw */
var48.i = p3;
/* 23: loadpw */
var49.i = p4;
/* 26: loadpw */
var50.i = p5;
/* 32: loadpb */
var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */
for (i = 0; i < n; i++) {
/* 0: loadb */
var43 = ptr4[i];
/* 2: subb */
var53 = var43 - var44;
/* 3: splatbw */
var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff);
/* 4: loadupdb */
var55 = ptr5[i >> 1];
/* 5: subb */
var56 = var55 - var44;
/* 6: splatbw */
var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff);
/* 7: loadupdb */
var58 = ptr6[i >> 1];
/* 8: subb */
var59 = var58 - var44;
/* 9: splatbw */
var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff);
/* 10: loadb */
var45 = ptr7[i];
/* 11: subb */
var61 = var45 - var44;
/* 13: mulhsw */
var62.i = (var54.i * var46.i) >> 16;
/* 15: mulhsw */
var63.i = (var60.i * var47.i) >> 16;
/* 16: addw */
var64.i = var62.i + var63.i;
/* 17: convssswb */
var65 = ORC_CLAMP_SB (var64.i);
/* 18: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var65;
_dest.x2[1] = var61;
var66.i = _dest.i;
}
/* 20: mulhsw */
var67.i = (var57.i * var48.i) >> 16;
/* 21: addw */
var68.i = var62.i + var67.i;
/* 22: convssswb */
var69 = ORC_CLAMP_SB (var68.i);
/* 24: mulhsw */
var70.i = (var57.i * var49.i) >> 16;
/* 25: addw */
var71.i = var62.i + var70.i;
/* 27: mulhsw */
var72.i = (var60.i * var50.i) >> 16;
/* 28: addw */
var73.i = var71.i + var72.i;
/* 29: convssswb */
var74 = ORC_CLAMP_SB (var73.i);
/* 30: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var69;
_dest.x2[1] = var74;
var75.i = _dest.i;
}
/* 31: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var75.i;
_dest.x2[1] = var66.i;
var76.i = _dest.i;
}
/* 33: addb */
var52.x4[0] = var76.x4[0] + var51.x4[0];
var52.x4[1] = var76.x4[1] + var51.x4[1];
var52.x4[2] = var76.x4[2] + var51.x4[2];
var52.x4[3] = var76.x4[3] + var51.x4[3];
/* 34: storel */
ptr0[i] = var52;
}
}
#else
static void
_backup_video_orc_convert_A420_BGRA (OrcExecutor * ORC_RESTRICT ex)
{
int i;
int n = ex->n;
orc_union32 *ORC_RESTRICT ptr0;
const orc_int8 *ORC_RESTRICT ptr4;
const orc_int8 *ORC_RESTRICT ptr5;
const orc_int8 *ORC_RESTRICT ptr6;
const orc_int8 *ORC_RESTRICT ptr7;
orc_int8 var43;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_int8 var44;
#else
orc_int8 var44;
#endif
orc_int8 var45;
orc_union16 var46;
orc_union16 var47;
orc_union16 var48;
orc_union16 var49;
orc_union16 var50;
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
volatile orc_union32 var51;
#else
orc_union32 var51;
#endif
orc_union32 var52;
orc_int8 var53;
orc_union16 var54;
orc_int8 var55;
orc_int8 var56;
orc_union16 var57;
orc_int8 var58;
orc_int8 var59;
orc_union16 var60;
orc_int8 var61;
orc_union16 var62;
orc_union16 var63;
orc_union16 var64;
orc_int8 var65;
orc_union16 var66;
orc_union16 var67;
orc_union16 var68;
orc_int8 var69;
orc_union16 var70;
orc_union16 var71;
orc_union16 var72;
orc_union16 var73;
orc_int8 var74;
orc_union16 var75;
orc_union32 var76;
ptr0 = (orc_union32 *) ex->arrays[0];
ptr4 = (orc_int8 *) ex->arrays[4];
ptr5 = (orc_int8 *) ex->arrays[5];
ptr6 = (orc_int8 *) ex->arrays[6];
ptr7 = (orc_int8 *) ex->arrays[7];
/* 1: loadpb */
var44 = 0x00000080; /* 128 or 6.32404e-322f */
/* 12: loadpw */
var46.i = ex->params[24];
/* 14: loadpw */
var47.i = ex->params[25];
/* 19: loadpw */
var48.i = ex->params[26];
/* 23: loadpw */
var49.i = ex->params[27];
/* 26: loadpw */
var50.i = ex->params[28];
/* 32: loadpb */
var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */
var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */
for (i = 0; i < n; i++) {
/* 0: loadb */
var43 = ptr4[i];
/* 2: subb */
var53 = var43 - var44;
/* 3: splatbw */
var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff);
/* 4: loadupdb */
var55 = ptr5[i >> 1];
/* 5: subb */
var56 = var55 - var44;
/* 6: splatbw */
var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff);
/* 7: loadupdb */
var58 = ptr6[i >> 1];
/* 8: subb */
var59 = var58 - var44;
/* 9: splatbw */
var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff);
/* 10: loadb */
var45 = ptr7[i];
/* 11: subb */
var61 = var45 - var44;
/* 13: mulhsw */
var62.i = (var54.i * var46.i) >> 16;
/* 15: mulhsw */
var63.i = (var60.i * var47.i) >> 16;
/* 16: addw */
var64.i = var62.i + var63.i;
/* 17: convssswb */
var65 = ORC_CLAMP_SB (var64.i);
/* 18: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var65;
_dest.x2[1] = var61;
var66.i = _dest.i;
}
/* 20: mulhsw */
var67.i = (var57.i * var48.i) >> 16;
/* 21: addw */
var68.i = var62.i + var67.i;
/* 22: convssswb */
var69 = ORC_CLAMP_SB (var68.i);
/* 24: mulhsw */
var70.i = (var57.i * var49.i) >> 16;
/* 25: addw */
var71.i = var62.i + var70.i;
/* 27: mulhsw */
var72.i = (var60.i * var50.i) >> 16;
/* 28: addw */
var73.i = var71.i + var72.i;
/* 29: convssswb */
var74 = ORC_CLAMP_SB (var73.i);
/* 30: mergebw */
{
orc_union16 _dest;
_dest.x2[0] = var69;
_dest.x2[1] = var74;
var75.i = _dest.i;
}
/* 31: mergewl */
{
orc_union32 _dest;
_dest.x2[0] = var75.i;
_dest.x2[1] = var66.i;
var76.i = _dest.i;
}
/* 33: addb */
var52.x4[0] = var76.x4[0] + var51.x4[0];
var52.x4[1] = var76.x4[1] + var51.x4[1];
var52.x4[2] = var76.x4[2] + var51.x4[2];
var52.x4[3] = var76.x4[3] + var51.x4[3];
/* 34: storel */
ptr0[i] = var52;
}
}
void
video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
int p2, int p3, int p4, int p5, int n)
{
OrcExecutor _ex, *ex = &_ex;
static volatile int p_inited = 0;
static OrcCode *c = 0;
void (*func) (OrcExecutor *);
if (!p_inited) {
orc_once_mutex_lock ();
if (!p_inited) {
OrcProgram *p;
#if 1
static const orc_uint8 bc[] = {
1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 111, 110,
118, 101, 114, 116, 95, 65, 52, 50, 48, 95, 66, 71, 82, 65, 11, 4,
4, 12, 1, 1, 12, 1, 1, 12, 1, 1, 12, 1, 1, 14, 1, 128,
0, 0, 0, 16, 2, 16, 2, 16, 2, 16, 2, 16, 2, 20, 2, 20,
2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 1, 20, 1, 20, 1, 20,
1, 20, 4, 65, 38, 4, 16, 151, 32, 38, 45, 38, 5, 65, 38, 38,
16, 151, 33, 38, 45, 38, 6, 65, 38, 38, 16, 151, 34, 38, 65, 41,
7, 16, 90, 32, 32, 24, 90, 35, 34, 25, 70, 35, 32, 35, 159, 38,
35, 196, 35, 38, 41, 90, 37, 33, 26, 70, 37, 32, 37, 159, 40, 37,
90, 36, 33, 27, 70, 36, 32, 36, 90, 32, 34, 28, 70, 36, 36, 32,
159, 39, 36, 196, 37, 40, 39, 195, 42, 37, 35, 21, 2, 33, 0, 42,
16, 2, 0,
};
p = orc_program_new_from_static_bytecode (bc);
orc_program_set_backup_function (p, _backup_video_orc_convert_A420_BGRA);
#else
p = orc_program_new ();
orc_program_set_name (p, "video_orc_convert_A420_BGRA");
orc_program_set_backup_function (p, _backup_video_orc_convert_A420_BGRA);
orc_program_add_destination (p, 4, "d1");
orc_program_add_source (p, 1, "s1");
orc_program_add_source (p, 1, "s2");
orc_program_add_source (p, 1, "s3");
orc_program_add_source (p, 1, "s4");
orc_program_add_constant (p, 1, 0x00000080, "c1");
orc_program_add_parameter (p, 2, "p1");
orc_program_add_parameter (p, 2, "p2");
orc_program_add_parameter (p, 2, "p3");
orc_program_add_parameter (p, 2, "p4");
orc_program_add_parameter (p, 2, "p5");
orc_program_add_temporary (p, 2, "t1");
orc_program_add_temporary (p, 2, "t2");
orc_program_add_temporary (p, 2, "t3");
orc_program_add_temporary (p, 2, "t4");
orc_program_add_temporary (p, 2, "t5");
orc_program_add_temporary (p, 2, "t6");
orc_program_add_temporary (p, 1, "t7");
orc_program_add_temporary (p, 1, "t8");
orc_program_add_temporary (p, 1, "t9");
orc_program_add_temporary (p, 1, "t10");
orc_program_add_temporary (p, 4, "t11");
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_S1, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T1, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S2,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T2, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S3,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1,
ORC_VAR_D1);
orc_program_append_2 (p, "subb", 0, ORC_VAR_T10, ORC_VAR_S4, ORC_VAR_C1,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T4,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T7, ORC_VAR_T4,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_T7,
ORC_VAR_T10, ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_P3,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T6,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T9, ORC_VAR_T6,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_P4,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T5,
ORC_VAR_D1);
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_P5,
ORC_VAR_D1);
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1,
ORC_VAR_D1);
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T8, ORC_VAR_T5,
ORC_VAR_D1, ORC_VAR_D1);
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T6, ORC_VAR_T9, ORC_VAR_T8,
ORC_VAR_D1);
orc_program_append_2 (p, "mergewl", 0, ORC_VAR_T11, ORC_VAR_T6,
ORC_VAR_T4, ORC_VAR_D1);
orc_program_append_2 (p, "addb", 2, ORC_VAR_D1, ORC_VAR_T11, ORC_VAR_C1,
ORC_VAR_D1);
#endif
orc_program_compile (p);
c = orc_program_take_code (p);
orc_program_free (p);
}
p_inited = TRUE;
orc_once_mutex_unlock ();
}
ex->arrays[ORC_VAR_A2] = c;
ex->program = 0;
ex->n = n;
ex->arrays[ORC_VAR_D1] = d1;
ex->arrays[ORC_VAR_S1] = (void *) s1;
ex->arrays[ORC_VAR_S2] = (void *) s2;
ex->arrays[ORC_VAR_S3] = (void *) s3;
ex->arrays[ORC_VAR_S4] = (void *) s4;
ex->params[ORC_VAR_P1] = p1;
ex->params[ORC_VAR_P2] = p2;
ex->params[ORC_VAR_P3] = p3;
ex->params[ORC_VAR_P4] = p4;
ex->params[ORC_VAR_P5] = p5;
func = c->exec;
func (ex);
}
#endif
/* video_orc_matrix8 */
#ifdef DISABLE_ORC
void

View file

@ -1,8 +1,7 @@
/* autogenerated from video-orc.orc */
#ifndef _VIDEO_ORC_H_
#define _VIDEO_ORC_H_
#pragma once
#include <glib.h>
@ -189,6 +188,8 @@ void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const
void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
void video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, int p2, int p3, int p4, int p5, int n);
void video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, int p2, int p3, int p4, int p5, int n);
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n);
void _custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n);
void video_orc_resample_h_near_u32_lq (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int p1, int p2, int n);
@ -242,5 +243,3 @@ void video_orc_convert_UYVY_GRAY8 (guint8 * ORC_RESTRICT d1, int d1_stride, cons
}
#endif
#endif

View file

@ -1876,6 +1876,118 @@ mergebw wb, g, b
mergewl x, wr, wb
x4 addb argb, x, c4128
.function video_orc_convert_A420_ARGB
.dest 4 argb guint8
.source 1 y guint8
.source 1 u guint8
.source 1 v guint8
.source 1 a guint8
.param 2 p1
.param 2 p2
.param 2 p3
.param 2 p4
.param 2 p5
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 1 da
.temp 4 x
.const 1 c128 128
subb r, y, c128
splatbw wy, r
loadupdb r, u
subb r, r, c128
splatbw wu, r
loadupdb r, v
subb r, r, c128
splatbw wv, r
subb da, a, c128
mulhsw wy, wy, p1
mulhsw wr, wv, p2
addw wr, wy, wr
convssswb r, wr
mergebw wr, da, r
mulhsw wb, wu, p3
addw wb, wy, wb
convssswb b, wb
mulhsw wg, wu, p4
addw wg, wy, wg
mulhsw wy, wv, p5
addw wg, wg, wy
convssswb g, wg
mergebw wb, g, b
mergewl x, wr, wb
x4 addb argb, x, c128
.function video_orc_convert_A420_BGRA
.dest 4 argb guint8
.source 1 y guint8
.source 1 u guint8
.source 1 v guint8
.source 1 a guint8
.param 2 p1
.param 2 p2
.param 2 p3
.param 2 p4
.param 2 p5
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 1 da
.temp 4 x
.const 1 c128 128
subb r, y, c128
splatbw wy, r
loadupdb r, u
subb r, r, c128
splatbw wu, r
loadupdb r, v
subb r, r, c128
splatbw wv, r
subb da, a, c128
mulhsw wy, wy, p1
mulhsw wr, wv, p2
addw wr, wy, wr
convssswb r, wr
mergebw wr, r, da
mulhsw wb, wu, p3
addw wb, wy, wb
convssswb b, wb
mulhsw wg, wu, p4
addw wg, wy, wg
mulhsw wy, wv, p5
addw wg, wg, wy
convssswb g, wg
mergebw wb, b, g
mergewl x, wb, wr
x4 addb argb, x, c128
.function video_orc_matrix8
.backup _custom_video_orc_matrix8
.source 4 argb guint8