mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-02-02 12:32:29 +00:00
video-converter: Add support for A420 to RGB fast path
Add fast path for A420 -> RGB format conversion Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/-/merge_requests/1245>
This commit is contained in:
parent
f5a79ce05f
commit
c654f86859
4 changed files with 1228 additions and 4 deletions
|
@ -5918,6 +5918,175 @@ convert_I420_pack_ARGB (GstVideoConverter * convert, const GstVideoFrame * src,
|
|||
convert_fill_border (convert, dest);
|
||||
}
|
||||
|
||||
static void
|
||||
convert_A420_pack_ARGB_task (FConvertTask * task)
|
||||
{
|
||||
gint i;
|
||||
gpointer d[GST_VIDEO_MAX_PLANES];
|
||||
|
||||
d[0] = FRAME_GET_LINE (task->dest, 0);
|
||||
d[0] =
|
||||
(guint8 *) d[0] +
|
||||
task->out_x * GST_VIDEO_FORMAT_INFO_PSTRIDE (task->dest->info.finfo, 0);
|
||||
|
||||
for (i = task->height_0; i < task->height_1; i++) {
|
||||
guint8 *sy, *su, *sv, *sa;
|
||||
|
||||
sy = FRAME_GET_Y_LINE (task->src, i + task->in_y);
|
||||
sy += task->in_x;
|
||||
su = FRAME_GET_U_LINE (task->src, (i + task->in_y) >> 1);
|
||||
su += (task->in_x >> 1);
|
||||
sv = FRAME_GET_V_LINE (task->src, (i + task->in_y) >> 1);
|
||||
sv += (task->in_x >> 1);
|
||||
sa = FRAME_GET_A_LINE (task->src, i + task->in_y);
|
||||
sa += task->in_x;
|
||||
|
||||
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
|
||||
video_orc_convert_A420_ARGB (task->tmpline, sy, su, sv, sa,
|
||||
task->data->im[0][0], task->data->im[0][2],
|
||||
task->data->im[2][1], task->data->im[1][1], task->data->im[1][2],
|
||||
task->width);
|
||||
#else
|
||||
video_orc_convert_A420_BGRA (task->tmpline, sy, su, sv, sa,
|
||||
task->data->im[0][0], task->data->im[0][2],
|
||||
task->data->im[2][1], task->data->im[1][1], task->data->im[1][2],
|
||||
task->width);
|
||||
#endif
|
||||
|
||||
task->dest->info.finfo->pack_func (task->dest->info.finfo,
|
||||
(GST_VIDEO_FRAME_IS_INTERLACED (task->dest) ?
|
||||
GST_VIDEO_PACK_FLAG_INTERLACED :
|
||||
GST_VIDEO_PACK_FLAG_NONE),
|
||||
task->tmpline, 0, d, task->dest->info.stride,
|
||||
task->dest->info.chroma_site, i + task->out_y, task->width);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
convert_A420_pack_ARGB (GstVideoConverter * convert, const GstVideoFrame * src,
|
||||
GstVideoFrame * dest)
|
||||
{
|
||||
int i;
|
||||
gint width = convert->in_width;
|
||||
gint height = convert->in_height;
|
||||
MatrixData *data = &convert->convert_matrix;
|
||||
FConvertTask *tasks;
|
||||
FConvertTask **tasks_p;
|
||||
gint n_threads;
|
||||
gint lines_per_thread;
|
||||
|
||||
n_threads = convert->conversion_runner->n_threads;
|
||||
tasks = convert->tasks[0] =
|
||||
g_renew (FConvertTask, convert->tasks[0], n_threads);
|
||||
tasks_p = convert->tasks_p[0] =
|
||||
g_renew (FConvertTask *, convert->tasks_p[0], n_threads);
|
||||
|
||||
lines_per_thread = (height + n_threads - 1) / n_threads;
|
||||
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
tasks[i].src = src;
|
||||
tasks[i].dest = dest;
|
||||
|
||||
tasks[i].width = width;
|
||||
tasks[i].data = data;
|
||||
tasks[i].in_x = convert->in_x;
|
||||
tasks[i].in_y = convert->in_y;
|
||||
tasks[i].out_x = convert->out_x;
|
||||
tasks[i].out_y = convert->out_y;
|
||||
tasks[i].tmpline = convert->tmpline[i];
|
||||
|
||||
tasks[i].height_0 = i * lines_per_thread;
|
||||
tasks[i].height_1 = tasks[i].height_0 + lines_per_thread;
|
||||
tasks[i].height_1 = MIN (height, tasks[i].height_1);
|
||||
|
||||
tasks_p[i] = &tasks[i];
|
||||
}
|
||||
|
||||
gst_parallelized_task_runner_run (convert->conversion_runner,
|
||||
(GstParallelizedTaskFunc) convert_A420_pack_ARGB_task,
|
||||
(gpointer) tasks_p);
|
||||
|
||||
convert_fill_border (convert, dest);
|
||||
}
|
||||
|
||||
static void
|
||||
convert_A420_BGRA_task (FConvertTask * task)
|
||||
{
|
||||
gint i;
|
||||
|
||||
for (i = task->height_0; i < task->height_1; i++) {
|
||||
guint8 *sy, *su, *sv, *sa, *d;
|
||||
|
||||
d = FRAME_GET_LINE (task->dest, i + task->out_y);
|
||||
d += (task->out_x * 4);
|
||||
sy = FRAME_GET_Y_LINE (task->src, i + task->in_y);
|
||||
sy += task->in_x;
|
||||
su = FRAME_GET_U_LINE (task->src, (i + task->in_y) >> 1);
|
||||
su += (task->in_x >> 1);
|
||||
sv = FRAME_GET_V_LINE (task->src, (i + task->in_y) >> 1);
|
||||
sv += (task->in_x >> 1);
|
||||
sa = FRAME_GET_A_LINE (task->src, i + task->in_y);
|
||||
sa += task->in_x;
|
||||
|
||||
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
|
||||
video_orc_convert_A420_BGRA (d, sy, su, sv, sa,
|
||||
task->data->im[0][0], task->data->im[0][2],
|
||||
task->data->im[2][1], task->data->im[1][1], task->data->im[1][2],
|
||||
task->width);
|
||||
#else
|
||||
video_orc_convert_A420_ARGB (d, sy, su, sv, sa,
|
||||
task->data->im[0][0], task->data->im[0][2],
|
||||
task->data->im[2][1], task->data->im[1][1], task->data->im[1][2],
|
||||
task->width);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
convert_A420_BGRA (GstVideoConverter * convert, const GstVideoFrame * src,
|
||||
GstVideoFrame * dest)
|
||||
{
|
||||
int i;
|
||||
gint width = convert->in_width;
|
||||
gint height = convert->in_height;
|
||||
MatrixData *data = &convert->convert_matrix;
|
||||
FConvertTask *tasks;
|
||||
FConvertTask **tasks_p;
|
||||
gint n_threads;
|
||||
gint lines_per_thread;
|
||||
|
||||
n_threads = convert->conversion_runner->n_threads;
|
||||
tasks = convert->tasks[0] =
|
||||
g_renew (FConvertTask, convert->tasks[0], n_threads);
|
||||
tasks_p = convert->tasks_p[0] =
|
||||
g_renew (FConvertTask *, convert->tasks_p[0], n_threads);
|
||||
|
||||
lines_per_thread = (height + n_threads - 1) / n_threads;
|
||||
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
tasks[i].src = src;
|
||||
tasks[i].dest = dest;
|
||||
|
||||
tasks[i].width = width;
|
||||
tasks[i].data = data;
|
||||
tasks[i].in_x = convert->in_x;
|
||||
tasks[i].in_y = convert->in_y;
|
||||
tasks[i].out_x = convert->out_x;
|
||||
tasks[i].out_y = convert->out_y;
|
||||
|
||||
tasks[i].height_0 = i * lines_per_thread;
|
||||
tasks[i].height_1 = tasks[i].height_0 + lines_per_thread;
|
||||
tasks[i].height_1 = MIN (height, tasks[i].height_1);
|
||||
|
||||
tasks_p[i] = &tasks[i];
|
||||
}
|
||||
|
||||
gst_parallelized_task_runner_run (convert->conversion_runner,
|
||||
(GstParallelizedTaskFunc) convert_A420_BGRA_task, (gpointer) tasks_p);
|
||||
|
||||
convert_fill_border (convert, dest);
|
||||
}
|
||||
|
||||
static void
|
||||
memset_u24 (guint8 * data, guint8 col[3], unsigned int n)
|
||||
{
|
||||
|
@ -7372,6 +7541,28 @@ static const VideoTransform transforms[] = {
|
|||
{GST_VIDEO_FORMAT_YV12, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
|
||||
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_ABGR, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_pack_ARGB},
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGBA, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_pack_ARGB},
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGRA, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, TRUE, FALSE, FALSE, 0, 0, convert_A420_BGRA},
|
||||
/* A420 to non-alpha RGB formats, reuse I420_* method */
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGRx, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_BGRA},
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_xBGR, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGBx, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGB, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGR, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_RGB15, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
|
||||
{GST_VIDEO_FORMAT_A420, GST_VIDEO_FORMAT_BGR16, FALSE, TRUE, TRUE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_I420_pack_ARGB},
|
||||
|
||||
/* scalers */
|
||||
{GST_VIDEO_FORMAT_GBR, GST_VIDEO_FORMAT_GBR, TRUE, FALSE, FALSE, TRUE,
|
||||
TRUE, FALSE, FALSE, FALSE, 0, 0, convert_scale_planes},
|
||||
|
|
|
@ -360,6 +360,14 @@ void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1,
|
|||
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
|
||||
const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5,
|
||||
int n);
|
||||
void video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1,
|
||||
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
|
||||
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
|
||||
int p2, int p3, int p4, int p5, int n);
|
||||
void video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1,
|
||||
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
|
||||
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
|
||||
int p2, int p3, int p4, int p5, int n);
|
||||
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1,
|
||||
const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3,
|
||||
orc_int64 p4, int n);
|
||||
|
@ -24295,6 +24303,920 @@ video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1,
|
|||
#endif
|
||||
|
||||
|
||||
/* video_orc_convert_A420_ARGB */
|
||||
#ifdef DISABLE_ORC
|
||||
void
|
||||
video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1,
|
||||
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
|
||||
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
|
||||
int p2, int p3, int p4, int p5, int n)
|
||||
{
|
||||
int i;
|
||||
orc_union32 *ORC_RESTRICT ptr0;
|
||||
const orc_int8 *ORC_RESTRICT ptr4;
|
||||
const orc_int8 *ORC_RESTRICT ptr5;
|
||||
const orc_int8 *ORC_RESTRICT ptr6;
|
||||
const orc_int8 *ORC_RESTRICT ptr7;
|
||||
orc_int8 var43;
|
||||
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
|
||||
volatile orc_int8 var44;
|
||||
#else
|
||||
orc_int8 var44;
|
||||
#endif
|
||||
orc_int8 var45;
|
||||
orc_union16 var46;
|
||||
orc_union16 var47;
|
||||
orc_union16 var48;
|
||||
orc_union16 var49;
|
||||
orc_union16 var50;
|
||||
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
|
||||
volatile orc_union32 var51;
|
||||
#else
|
||||
orc_union32 var51;
|
||||
#endif
|
||||
orc_union32 var52;
|
||||
orc_int8 var53;
|
||||
orc_union16 var54;
|
||||
orc_int8 var55;
|
||||
orc_int8 var56;
|
||||
orc_union16 var57;
|
||||
orc_int8 var58;
|
||||
orc_int8 var59;
|
||||
orc_union16 var60;
|
||||
orc_int8 var61;
|
||||
orc_union16 var62;
|
||||
orc_union16 var63;
|
||||
orc_union16 var64;
|
||||
orc_int8 var65;
|
||||
orc_union16 var66;
|
||||
orc_union16 var67;
|
||||
orc_union16 var68;
|
||||
orc_int8 var69;
|
||||
orc_union16 var70;
|
||||
orc_union16 var71;
|
||||
orc_union16 var72;
|
||||
orc_union16 var73;
|
||||
orc_int8 var74;
|
||||
orc_union16 var75;
|
||||
orc_union32 var76;
|
||||
|
||||
ptr0 = (orc_union32 *) d1;
|
||||
ptr4 = (orc_int8 *) s1;
|
||||
ptr5 = (orc_int8 *) s2;
|
||||
ptr6 = (orc_int8 *) s3;
|
||||
ptr7 = (orc_int8 *) s4;
|
||||
|
||||
/* 1: loadpb */
|
||||
var44 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 12: loadpw */
|
||||
var46.i = p1;
|
||||
/* 14: loadpw */
|
||||
var47.i = p2;
|
||||
/* 19: loadpw */
|
||||
var48.i = p3;
|
||||
/* 23: loadpw */
|
||||
var49.i = p4;
|
||||
/* 26: loadpw */
|
||||
var50.i = p5;
|
||||
/* 32: loadpb */
|
||||
var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
/* 0: loadb */
|
||||
var43 = ptr4[i];
|
||||
/* 2: subb */
|
||||
var53 = var43 - var44;
|
||||
/* 3: splatbw */
|
||||
var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff);
|
||||
/* 4: loadupdb */
|
||||
var55 = ptr5[i >> 1];
|
||||
/* 5: subb */
|
||||
var56 = var55 - var44;
|
||||
/* 6: splatbw */
|
||||
var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff);
|
||||
/* 7: loadupdb */
|
||||
var58 = ptr6[i >> 1];
|
||||
/* 8: subb */
|
||||
var59 = var58 - var44;
|
||||
/* 9: splatbw */
|
||||
var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff);
|
||||
/* 10: loadb */
|
||||
var45 = ptr7[i];
|
||||
/* 11: subb */
|
||||
var61 = var45 - var44;
|
||||
/* 13: mulhsw */
|
||||
var62.i = (var54.i * var46.i) >> 16;
|
||||
/* 15: mulhsw */
|
||||
var63.i = (var60.i * var47.i) >> 16;
|
||||
/* 16: addw */
|
||||
var64.i = var62.i + var63.i;
|
||||
/* 17: convssswb */
|
||||
var65 = ORC_CLAMP_SB (var64.i);
|
||||
/* 18: mergebw */
|
||||
{
|
||||
orc_union16 _dest;
|
||||
_dest.x2[0] = var61;
|
||||
_dest.x2[1] = var65;
|
||||
var66.i = _dest.i;
|
||||
}
|
||||
/* 20: mulhsw */
|
||||
var67.i = (var57.i * var48.i) >> 16;
|
||||
/* 21: addw */
|
||||
var68.i = var62.i + var67.i;
|
||||
/* 22: convssswb */
|
||||
var69 = ORC_CLAMP_SB (var68.i);
|
||||
/* 24: mulhsw */
|
||||
var70.i = (var57.i * var49.i) >> 16;
|
||||
/* 25: addw */
|
||||
var71.i = var62.i + var70.i;
|
||||
/* 27: mulhsw */
|
||||
var72.i = (var60.i * var50.i) >> 16;
|
||||
/* 28: addw */
|
||||
var73.i = var71.i + var72.i;
|
||||
/* 29: convssswb */
|
||||
var74 = ORC_CLAMP_SB (var73.i);
|
||||
/* 30: mergebw */
|
||||
{
|
||||
orc_union16 _dest;
|
||||
_dest.x2[0] = var74;
|
||||
_dest.x2[1] = var69;
|
||||
var75.i = _dest.i;
|
||||
}
|
||||
/* 31: mergewl */
|
||||
{
|
||||
orc_union32 _dest;
|
||||
_dest.x2[0] = var66.i;
|
||||
_dest.x2[1] = var75.i;
|
||||
var76.i = _dest.i;
|
||||
}
|
||||
/* 33: addb */
|
||||
var52.x4[0] = var76.x4[0] + var51.x4[0];
|
||||
var52.x4[1] = var76.x4[1] + var51.x4[1];
|
||||
var52.x4[2] = var76.x4[2] + var51.x4[2];
|
||||
var52.x4[3] = var76.x4[3] + var51.x4[3];
|
||||
/* 34: storel */
|
||||
ptr0[i] = var52;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
static void
|
||||
_backup_video_orc_convert_A420_ARGB (OrcExecutor * ORC_RESTRICT ex)
|
||||
{
|
||||
int i;
|
||||
int n = ex->n;
|
||||
orc_union32 *ORC_RESTRICT ptr0;
|
||||
const orc_int8 *ORC_RESTRICT ptr4;
|
||||
const orc_int8 *ORC_RESTRICT ptr5;
|
||||
const orc_int8 *ORC_RESTRICT ptr6;
|
||||
const orc_int8 *ORC_RESTRICT ptr7;
|
||||
orc_int8 var43;
|
||||
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
|
||||
volatile orc_int8 var44;
|
||||
#else
|
||||
orc_int8 var44;
|
||||
#endif
|
||||
orc_int8 var45;
|
||||
orc_union16 var46;
|
||||
orc_union16 var47;
|
||||
orc_union16 var48;
|
||||
orc_union16 var49;
|
||||
orc_union16 var50;
|
||||
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
|
||||
volatile orc_union32 var51;
|
||||
#else
|
||||
orc_union32 var51;
|
||||
#endif
|
||||
orc_union32 var52;
|
||||
orc_int8 var53;
|
||||
orc_union16 var54;
|
||||
orc_int8 var55;
|
||||
orc_int8 var56;
|
||||
orc_union16 var57;
|
||||
orc_int8 var58;
|
||||
orc_int8 var59;
|
||||
orc_union16 var60;
|
||||
orc_int8 var61;
|
||||
orc_union16 var62;
|
||||
orc_union16 var63;
|
||||
orc_union16 var64;
|
||||
orc_int8 var65;
|
||||
orc_union16 var66;
|
||||
orc_union16 var67;
|
||||
orc_union16 var68;
|
||||
orc_int8 var69;
|
||||
orc_union16 var70;
|
||||
orc_union16 var71;
|
||||
orc_union16 var72;
|
||||
orc_union16 var73;
|
||||
orc_int8 var74;
|
||||
orc_union16 var75;
|
||||
orc_union32 var76;
|
||||
|
||||
ptr0 = (orc_union32 *) ex->arrays[0];
|
||||
ptr4 = (orc_int8 *) ex->arrays[4];
|
||||
ptr5 = (orc_int8 *) ex->arrays[5];
|
||||
ptr6 = (orc_int8 *) ex->arrays[6];
|
||||
ptr7 = (orc_int8 *) ex->arrays[7];
|
||||
|
||||
/* 1: loadpb */
|
||||
var44 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 12: loadpw */
|
||||
var46.i = ex->params[24];
|
||||
/* 14: loadpw */
|
||||
var47.i = ex->params[25];
|
||||
/* 19: loadpw */
|
||||
var48.i = ex->params[26];
|
||||
/* 23: loadpw */
|
||||
var49.i = ex->params[27];
|
||||
/* 26: loadpw */
|
||||
var50.i = ex->params[28];
|
||||
/* 32: loadpb */
|
||||
var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
/* 0: loadb */
|
||||
var43 = ptr4[i];
|
||||
/* 2: subb */
|
||||
var53 = var43 - var44;
|
||||
/* 3: splatbw */
|
||||
var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff);
|
||||
/* 4: loadupdb */
|
||||
var55 = ptr5[i >> 1];
|
||||
/* 5: subb */
|
||||
var56 = var55 - var44;
|
||||
/* 6: splatbw */
|
||||
var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff);
|
||||
/* 7: loadupdb */
|
||||
var58 = ptr6[i >> 1];
|
||||
/* 8: subb */
|
||||
var59 = var58 - var44;
|
||||
/* 9: splatbw */
|
||||
var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff);
|
||||
/* 10: loadb */
|
||||
var45 = ptr7[i];
|
||||
/* 11: subb */
|
||||
var61 = var45 - var44;
|
||||
/* 13: mulhsw */
|
||||
var62.i = (var54.i * var46.i) >> 16;
|
||||
/* 15: mulhsw */
|
||||
var63.i = (var60.i * var47.i) >> 16;
|
||||
/* 16: addw */
|
||||
var64.i = var62.i + var63.i;
|
||||
/* 17: convssswb */
|
||||
var65 = ORC_CLAMP_SB (var64.i);
|
||||
/* 18: mergebw */
|
||||
{
|
||||
orc_union16 _dest;
|
||||
_dest.x2[0] = var61;
|
||||
_dest.x2[1] = var65;
|
||||
var66.i = _dest.i;
|
||||
}
|
||||
/* 20: mulhsw */
|
||||
var67.i = (var57.i * var48.i) >> 16;
|
||||
/* 21: addw */
|
||||
var68.i = var62.i + var67.i;
|
||||
/* 22: convssswb */
|
||||
var69 = ORC_CLAMP_SB (var68.i);
|
||||
/* 24: mulhsw */
|
||||
var70.i = (var57.i * var49.i) >> 16;
|
||||
/* 25: addw */
|
||||
var71.i = var62.i + var70.i;
|
||||
/* 27: mulhsw */
|
||||
var72.i = (var60.i * var50.i) >> 16;
|
||||
/* 28: addw */
|
||||
var73.i = var71.i + var72.i;
|
||||
/* 29: convssswb */
|
||||
var74 = ORC_CLAMP_SB (var73.i);
|
||||
/* 30: mergebw */
|
||||
{
|
||||
orc_union16 _dest;
|
||||
_dest.x2[0] = var74;
|
||||
_dest.x2[1] = var69;
|
||||
var75.i = _dest.i;
|
||||
}
|
||||
/* 31: mergewl */
|
||||
{
|
||||
orc_union32 _dest;
|
||||
_dest.x2[0] = var66.i;
|
||||
_dest.x2[1] = var75.i;
|
||||
var76.i = _dest.i;
|
||||
}
|
||||
/* 33: addb */
|
||||
var52.x4[0] = var76.x4[0] + var51.x4[0];
|
||||
var52.x4[1] = var76.x4[1] + var51.x4[1];
|
||||
var52.x4[2] = var76.x4[2] + var51.x4[2];
|
||||
var52.x4[3] = var76.x4[3] + var51.x4[3];
|
||||
/* 34: storel */
|
||||
ptr0[i] = var52;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1,
|
||||
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
|
||||
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
|
||||
int p2, int p3, int p4, int p5, int n)
|
||||
{
|
||||
OrcExecutor _ex, *ex = &_ex;
|
||||
static volatile int p_inited = 0;
|
||||
static OrcCode *c = 0;
|
||||
void (*func) (OrcExecutor *);
|
||||
|
||||
if (!p_inited) {
|
||||
orc_once_mutex_lock ();
|
||||
if (!p_inited) {
|
||||
OrcProgram *p;
|
||||
|
||||
#if 1
|
||||
static const orc_uint8 bc[] = {
|
||||
1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 111, 110,
|
||||
118, 101, 114, 116, 95, 65, 52, 50, 48, 95, 65, 82, 71, 66, 11, 4,
|
||||
4, 12, 1, 1, 12, 1, 1, 12, 1, 1, 12, 1, 1, 14, 1, 128,
|
||||
0, 0, 0, 16, 2, 16, 2, 16, 2, 16, 2, 16, 2, 20, 2, 20,
|
||||
2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 1, 20, 1, 20, 1, 20,
|
||||
1, 20, 4, 65, 38, 4, 16, 151, 32, 38, 45, 38, 5, 65, 38, 38,
|
||||
16, 151, 33, 38, 45, 38, 6, 65, 38, 38, 16, 151, 34, 38, 65, 41,
|
||||
7, 16, 90, 32, 32, 24, 90, 35, 34, 25, 70, 35, 32, 35, 159, 38,
|
||||
35, 196, 35, 41, 38, 90, 37, 33, 26, 70, 37, 32, 37, 159, 40, 37,
|
||||
90, 36, 33, 27, 70, 36, 32, 36, 90, 32, 34, 28, 70, 36, 36, 32,
|
||||
159, 39, 36, 196, 37, 39, 40, 195, 42, 35, 37, 21, 2, 33, 0, 42,
|
||||
16, 2, 0,
|
||||
};
|
||||
p = orc_program_new_from_static_bytecode (bc);
|
||||
orc_program_set_backup_function (p, _backup_video_orc_convert_A420_ARGB);
|
||||
#else
|
||||
p = orc_program_new ();
|
||||
orc_program_set_name (p, "video_orc_convert_A420_ARGB");
|
||||
orc_program_set_backup_function (p, _backup_video_orc_convert_A420_ARGB);
|
||||
orc_program_add_destination (p, 4, "d1");
|
||||
orc_program_add_source (p, 1, "s1");
|
||||
orc_program_add_source (p, 1, "s2");
|
||||
orc_program_add_source (p, 1, "s3");
|
||||
orc_program_add_source (p, 1, "s4");
|
||||
orc_program_add_constant (p, 1, 0x00000080, "c1");
|
||||
orc_program_add_parameter (p, 2, "p1");
|
||||
orc_program_add_parameter (p, 2, "p2");
|
||||
orc_program_add_parameter (p, 2, "p3");
|
||||
orc_program_add_parameter (p, 2, "p4");
|
||||
orc_program_add_parameter (p, 2, "p5");
|
||||
orc_program_add_temporary (p, 2, "t1");
|
||||
orc_program_add_temporary (p, 2, "t2");
|
||||
orc_program_add_temporary (p, 2, "t3");
|
||||
orc_program_add_temporary (p, 2, "t4");
|
||||
orc_program_add_temporary (p, 2, "t5");
|
||||
orc_program_add_temporary (p, 2, "t6");
|
||||
orc_program_add_temporary (p, 1, "t7");
|
||||
orc_program_add_temporary (p, 1, "t8");
|
||||
orc_program_add_temporary (p, 1, "t9");
|
||||
orc_program_add_temporary (p, 1, "t10");
|
||||
orc_program_add_temporary (p, 4, "t11");
|
||||
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_S1, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T1, ORC_VAR_T7, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S2,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T2, ORC_VAR_T7, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S3,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T10, ORC_VAR_S4, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T4,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T7, ORC_VAR_T4,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_T10,
|
||||
ORC_VAR_T7, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_P3,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T6,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T9, ORC_VAR_T6,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_P4,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T5,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_P5,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T8, ORC_VAR_T5,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_T9,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mergewl", 0, ORC_VAR_T11, ORC_VAR_T4,
|
||||
ORC_VAR_T6, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addb", 2, ORC_VAR_D1, ORC_VAR_T11, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
#endif
|
||||
|
||||
orc_program_compile (p);
|
||||
c = orc_program_take_code (p);
|
||||
orc_program_free (p);
|
||||
}
|
||||
p_inited = TRUE;
|
||||
orc_once_mutex_unlock ();
|
||||
}
|
||||
ex->arrays[ORC_VAR_A2] = c;
|
||||
ex->program = 0;
|
||||
|
||||
ex->n = n;
|
||||
ex->arrays[ORC_VAR_D1] = d1;
|
||||
ex->arrays[ORC_VAR_S1] = (void *) s1;
|
||||
ex->arrays[ORC_VAR_S2] = (void *) s2;
|
||||
ex->arrays[ORC_VAR_S3] = (void *) s3;
|
||||
ex->arrays[ORC_VAR_S4] = (void *) s4;
|
||||
ex->params[ORC_VAR_P1] = p1;
|
||||
ex->params[ORC_VAR_P2] = p2;
|
||||
ex->params[ORC_VAR_P3] = p3;
|
||||
ex->params[ORC_VAR_P4] = p4;
|
||||
ex->params[ORC_VAR_P5] = p5;
|
||||
|
||||
func = c->exec;
|
||||
func (ex);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* video_orc_convert_A420_BGRA */
|
||||
#ifdef DISABLE_ORC
|
||||
void
|
||||
video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1,
|
||||
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
|
||||
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
|
||||
int p2, int p3, int p4, int p5, int n)
|
||||
{
|
||||
int i;
|
||||
orc_union32 *ORC_RESTRICT ptr0;
|
||||
const orc_int8 *ORC_RESTRICT ptr4;
|
||||
const orc_int8 *ORC_RESTRICT ptr5;
|
||||
const orc_int8 *ORC_RESTRICT ptr6;
|
||||
const orc_int8 *ORC_RESTRICT ptr7;
|
||||
orc_int8 var43;
|
||||
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
|
||||
volatile orc_int8 var44;
|
||||
#else
|
||||
orc_int8 var44;
|
||||
#endif
|
||||
orc_int8 var45;
|
||||
orc_union16 var46;
|
||||
orc_union16 var47;
|
||||
orc_union16 var48;
|
||||
orc_union16 var49;
|
||||
orc_union16 var50;
|
||||
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
|
||||
volatile orc_union32 var51;
|
||||
#else
|
||||
orc_union32 var51;
|
||||
#endif
|
||||
orc_union32 var52;
|
||||
orc_int8 var53;
|
||||
orc_union16 var54;
|
||||
orc_int8 var55;
|
||||
orc_int8 var56;
|
||||
orc_union16 var57;
|
||||
orc_int8 var58;
|
||||
orc_int8 var59;
|
||||
orc_union16 var60;
|
||||
orc_int8 var61;
|
||||
orc_union16 var62;
|
||||
orc_union16 var63;
|
||||
orc_union16 var64;
|
||||
orc_int8 var65;
|
||||
orc_union16 var66;
|
||||
orc_union16 var67;
|
||||
orc_union16 var68;
|
||||
orc_int8 var69;
|
||||
orc_union16 var70;
|
||||
orc_union16 var71;
|
||||
orc_union16 var72;
|
||||
orc_union16 var73;
|
||||
orc_int8 var74;
|
||||
orc_union16 var75;
|
||||
orc_union32 var76;
|
||||
|
||||
ptr0 = (orc_union32 *) d1;
|
||||
ptr4 = (orc_int8 *) s1;
|
||||
ptr5 = (orc_int8 *) s2;
|
||||
ptr6 = (orc_int8 *) s3;
|
||||
ptr7 = (orc_int8 *) s4;
|
||||
|
||||
/* 1: loadpb */
|
||||
var44 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 12: loadpw */
|
||||
var46.i = p1;
|
||||
/* 14: loadpw */
|
||||
var47.i = p2;
|
||||
/* 19: loadpw */
|
||||
var48.i = p3;
|
||||
/* 23: loadpw */
|
||||
var49.i = p4;
|
||||
/* 26: loadpw */
|
||||
var50.i = p5;
|
||||
/* 32: loadpb */
|
||||
var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
/* 0: loadb */
|
||||
var43 = ptr4[i];
|
||||
/* 2: subb */
|
||||
var53 = var43 - var44;
|
||||
/* 3: splatbw */
|
||||
var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff);
|
||||
/* 4: loadupdb */
|
||||
var55 = ptr5[i >> 1];
|
||||
/* 5: subb */
|
||||
var56 = var55 - var44;
|
||||
/* 6: splatbw */
|
||||
var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff);
|
||||
/* 7: loadupdb */
|
||||
var58 = ptr6[i >> 1];
|
||||
/* 8: subb */
|
||||
var59 = var58 - var44;
|
||||
/* 9: splatbw */
|
||||
var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff);
|
||||
/* 10: loadb */
|
||||
var45 = ptr7[i];
|
||||
/* 11: subb */
|
||||
var61 = var45 - var44;
|
||||
/* 13: mulhsw */
|
||||
var62.i = (var54.i * var46.i) >> 16;
|
||||
/* 15: mulhsw */
|
||||
var63.i = (var60.i * var47.i) >> 16;
|
||||
/* 16: addw */
|
||||
var64.i = var62.i + var63.i;
|
||||
/* 17: convssswb */
|
||||
var65 = ORC_CLAMP_SB (var64.i);
|
||||
/* 18: mergebw */
|
||||
{
|
||||
orc_union16 _dest;
|
||||
_dest.x2[0] = var65;
|
||||
_dest.x2[1] = var61;
|
||||
var66.i = _dest.i;
|
||||
}
|
||||
/* 20: mulhsw */
|
||||
var67.i = (var57.i * var48.i) >> 16;
|
||||
/* 21: addw */
|
||||
var68.i = var62.i + var67.i;
|
||||
/* 22: convssswb */
|
||||
var69 = ORC_CLAMP_SB (var68.i);
|
||||
/* 24: mulhsw */
|
||||
var70.i = (var57.i * var49.i) >> 16;
|
||||
/* 25: addw */
|
||||
var71.i = var62.i + var70.i;
|
||||
/* 27: mulhsw */
|
||||
var72.i = (var60.i * var50.i) >> 16;
|
||||
/* 28: addw */
|
||||
var73.i = var71.i + var72.i;
|
||||
/* 29: convssswb */
|
||||
var74 = ORC_CLAMP_SB (var73.i);
|
||||
/* 30: mergebw */
|
||||
{
|
||||
orc_union16 _dest;
|
||||
_dest.x2[0] = var69;
|
||||
_dest.x2[1] = var74;
|
||||
var75.i = _dest.i;
|
||||
}
|
||||
/* 31: mergewl */
|
||||
{
|
||||
orc_union32 _dest;
|
||||
_dest.x2[0] = var75.i;
|
||||
_dest.x2[1] = var66.i;
|
||||
var76.i = _dest.i;
|
||||
}
|
||||
/* 33: addb */
|
||||
var52.x4[0] = var76.x4[0] + var51.x4[0];
|
||||
var52.x4[1] = var76.x4[1] + var51.x4[1];
|
||||
var52.x4[2] = var76.x4[2] + var51.x4[2];
|
||||
var52.x4[3] = var76.x4[3] + var51.x4[3];
|
||||
/* 34: storel */
|
||||
ptr0[i] = var52;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
static void
|
||||
_backup_video_orc_convert_A420_BGRA (OrcExecutor * ORC_RESTRICT ex)
|
||||
{
|
||||
int i;
|
||||
int n = ex->n;
|
||||
orc_union32 *ORC_RESTRICT ptr0;
|
||||
const orc_int8 *ORC_RESTRICT ptr4;
|
||||
const orc_int8 *ORC_RESTRICT ptr5;
|
||||
const orc_int8 *ORC_RESTRICT ptr6;
|
||||
const orc_int8 *ORC_RESTRICT ptr7;
|
||||
orc_int8 var43;
|
||||
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
|
||||
volatile orc_int8 var44;
|
||||
#else
|
||||
orc_int8 var44;
|
||||
#endif
|
||||
orc_int8 var45;
|
||||
orc_union16 var46;
|
||||
orc_union16 var47;
|
||||
orc_union16 var48;
|
||||
orc_union16 var49;
|
||||
orc_union16 var50;
|
||||
#if defined(__APPLE__) && __GNUC__ == 4 && __GNUC_MINOR__ == 2 && defined (__i386__)
|
||||
volatile orc_union32 var51;
|
||||
#else
|
||||
orc_union32 var51;
|
||||
#endif
|
||||
orc_union32 var52;
|
||||
orc_int8 var53;
|
||||
orc_union16 var54;
|
||||
orc_int8 var55;
|
||||
orc_int8 var56;
|
||||
orc_union16 var57;
|
||||
orc_int8 var58;
|
||||
orc_int8 var59;
|
||||
orc_union16 var60;
|
||||
orc_int8 var61;
|
||||
orc_union16 var62;
|
||||
orc_union16 var63;
|
||||
orc_union16 var64;
|
||||
orc_int8 var65;
|
||||
orc_union16 var66;
|
||||
orc_union16 var67;
|
||||
orc_union16 var68;
|
||||
orc_int8 var69;
|
||||
orc_union16 var70;
|
||||
orc_union16 var71;
|
||||
orc_union16 var72;
|
||||
orc_union16 var73;
|
||||
orc_int8 var74;
|
||||
orc_union16 var75;
|
||||
orc_union32 var76;
|
||||
|
||||
ptr0 = (orc_union32 *) ex->arrays[0];
|
||||
ptr4 = (orc_int8 *) ex->arrays[4];
|
||||
ptr5 = (orc_int8 *) ex->arrays[5];
|
||||
ptr6 = (orc_int8 *) ex->arrays[6];
|
||||
ptr7 = (orc_int8 *) ex->arrays[7];
|
||||
|
||||
/* 1: loadpb */
|
||||
var44 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 12: loadpw */
|
||||
var46.i = ex->params[24];
|
||||
/* 14: loadpw */
|
||||
var47.i = ex->params[25];
|
||||
/* 19: loadpw */
|
||||
var48.i = ex->params[26];
|
||||
/* 23: loadpw */
|
||||
var49.i = ex->params[27];
|
||||
/* 26: loadpw */
|
||||
var50.i = ex->params[28];
|
||||
/* 32: loadpb */
|
||||
var51.x4[0] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[1] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[2] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
var51.x4[3] = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
/* 0: loadb */
|
||||
var43 = ptr4[i];
|
||||
/* 2: subb */
|
||||
var53 = var43 - var44;
|
||||
/* 3: splatbw */
|
||||
var54.i = ((var53 & 0xff) << 8) | (var53 & 0xff);
|
||||
/* 4: loadupdb */
|
||||
var55 = ptr5[i >> 1];
|
||||
/* 5: subb */
|
||||
var56 = var55 - var44;
|
||||
/* 6: splatbw */
|
||||
var57.i = ((var56 & 0xff) << 8) | (var56 & 0xff);
|
||||
/* 7: loadupdb */
|
||||
var58 = ptr6[i >> 1];
|
||||
/* 8: subb */
|
||||
var59 = var58 - var44;
|
||||
/* 9: splatbw */
|
||||
var60.i = ((var59 & 0xff) << 8) | (var59 & 0xff);
|
||||
/* 10: loadb */
|
||||
var45 = ptr7[i];
|
||||
/* 11: subb */
|
||||
var61 = var45 - var44;
|
||||
/* 13: mulhsw */
|
||||
var62.i = (var54.i * var46.i) >> 16;
|
||||
/* 15: mulhsw */
|
||||
var63.i = (var60.i * var47.i) >> 16;
|
||||
/* 16: addw */
|
||||
var64.i = var62.i + var63.i;
|
||||
/* 17: convssswb */
|
||||
var65 = ORC_CLAMP_SB (var64.i);
|
||||
/* 18: mergebw */
|
||||
{
|
||||
orc_union16 _dest;
|
||||
_dest.x2[0] = var65;
|
||||
_dest.x2[1] = var61;
|
||||
var66.i = _dest.i;
|
||||
}
|
||||
/* 20: mulhsw */
|
||||
var67.i = (var57.i * var48.i) >> 16;
|
||||
/* 21: addw */
|
||||
var68.i = var62.i + var67.i;
|
||||
/* 22: convssswb */
|
||||
var69 = ORC_CLAMP_SB (var68.i);
|
||||
/* 24: mulhsw */
|
||||
var70.i = (var57.i * var49.i) >> 16;
|
||||
/* 25: addw */
|
||||
var71.i = var62.i + var70.i;
|
||||
/* 27: mulhsw */
|
||||
var72.i = (var60.i * var50.i) >> 16;
|
||||
/* 28: addw */
|
||||
var73.i = var71.i + var72.i;
|
||||
/* 29: convssswb */
|
||||
var74 = ORC_CLAMP_SB (var73.i);
|
||||
/* 30: mergebw */
|
||||
{
|
||||
orc_union16 _dest;
|
||||
_dest.x2[0] = var69;
|
||||
_dest.x2[1] = var74;
|
||||
var75.i = _dest.i;
|
||||
}
|
||||
/* 31: mergewl */
|
||||
{
|
||||
orc_union32 _dest;
|
||||
_dest.x2[0] = var75.i;
|
||||
_dest.x2[1] = var66.i;
|
||||
var76.i = _dest.i;
|
||||
}
|
||||
/* 33: addb */
|
||||
var52.x4[0] = var76.x4[0] + var51.x4[0];
|
||||
var52.x4[1] = var76.x4[1] + var51.x4[1];
|
||||
var52.x4[2] = var76.x4[2] + var51.x4[2];
|
||||
var52.x4[3] = var76.x4[3] + var51.x4[3];
|
||||
/* 34: storel */
|
||||
ptr0[i] = var52;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1,
|
||||
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
|
||||
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1,
|
||||
int p2, int p3, int p4, int p5, int n)
|
||||
{
|
||||
OrcExecutor _ex, *ex = &_ex;
|
||||
static volatile int p_inited = 0;
|
||||
static OrcCode *c = 0;
|
||||
void (*func) (OrcExecutor *);
|
||||
|
||||
if (!p_inited) {
|
||||
orc_once_mutex_lock ();
|
||||
if (!p_inited) {
|
||||
OrcProgram *p;
|
||||
|
||||
#if 1
|
||||
static const orc_uint8 bc[] = {
|
||||
1, 9, 27, 118, 105, 100, 101, 111, 95, 111, 114, 99, 95, 99, 111, 110,
|
||||
118, 101, 114, 116, 95, 65, 52, 50, 48, 95, 66, 71, 82, 65, 11, 4,
|
||||
4, 12, 1, 1, 12, 1, 1, 12, 1, 1, 12, 1, 1, 14, 1, 128,
|
||||
0, 0, 0, 16, 2, 16, 2, 16, 2, 16, 2, 16, 2, 20, 2, 20,
|
||||
2, 20, 2, 20, 2, 20, 2, 20, 2, 20, 1, 20, 1, 20, 1, 20,
|
||||
1, 20, 4, 65, 38, 4, 16, 151, 32, 38, 45, 38, 5, 65, 38, 38,
|
||||
16, 151, 33, 38, 45, 38, 6, 65, 38, 38, 16, 151, 34, 38, 65, 41,
|
||||
7, 16, 90, 32, 32, 24, 90, 35, 34, 25, 70, 35, 32, 35, 159, 38,
|
||||
35, 196, 35, 38, 41, 90, 37, 33, 26, 70, 37, 32, 37, 159, 40, 37,
|
||||
90, 36, 33, 27, 70, 36, 32, 36, 90, 32, 34, 28, 70, 36, 36, 32,
|
||||
159, 39, 36, 196, 37, 40, 39, 195, 42, 37, 35, 21, 2, 33, 0, 42,
|
||||
16, 2, 0,
|
||||
};
|
||||
p = orc_program_new_from_static_bytecode (bc);
|
||||
orc_program_set_backup_function (p, _backup_video_orc_convert_A420_BGRA);
|
||||
#else
|
||||
p = orc_program_new ();
|
||||
orc_program_set_name (p, "video_orc_convert_A420_BGRA");
|
||||
orc_program_set_backup_function (p, _backup_video_orc_convert_A420_BGRA);
|
||||
orc_program_add_destination (p, 4, "d1");
|
||||
orc_program_add_source (p, 1, "s1");
|
||||
orc_program_add_source (p, 1, "s2");
|
||||
orc_program_add_source (p, 1, "s3");
|
||||
orc_program_add_source (p, 1, "s4");
|
||||
orc_program_add_constant (p, 1, 0x00000080, "c1");
|
||||
orc_program_add_parameter (p, 2, "p1");
|
||||
orc_program_add_parameter (p, 2, "p2");
|
||||
orc_program_add_parameter (p, 2, "p3");
|
||||
orc_program_add_parameter (p, 2, "p4");
|
||||
orc_program_add_parameter (p, 2, "p5");
|
||||
orc_program_add_temporary (p, 2, "t1");
|
||||
orc_program_add_temporary (p, 2, "t2");
|
||||
orc_program_add_temporary (p, 2, "t3");
|
||||
orc_program_add_temporary (p, 2, "t4");
|
||||
orc_program_add_temporary (p, 2, "t5");
|
||||
orc_program_add_temporary (p, 2, "t6");
|
||||
orc_program_add_temporary (p, 1, "t7");
|
||||
orc_program_add_temporary (p, 1, "t8");
|
||||
orc_program_add_temporary (p, 1, "t9");
|
||||
orc_program_add_temporary (p, 1, "t10");
|
||||
orc_program_add_temporary (p, 4, "t11");
|
||||
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_S1, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T1, ORC_VAR_T7, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S2,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T2, ORC_VAR_T7, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "loadupdb", 0, ORC_VAR_T7, ORC_VAR_S3,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "splatbw", 0, ORC_VAR_T3, ORC_VAR_T7, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T10, ORC_VAR_S4, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T1, ORC_VAR_T4,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T7, ORC_VAR_T4,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_T7,
|
||||
ORC_VAR_T10, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T6, ORC_VAR_T2, ORC_VAR_P3,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T6,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T9, ORC_VAR_T6,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_P4,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T1, ORC_VAR_T5,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mulhsw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_P5,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "convssswb", 0, ORC_VAR_T8, ORC_VAR_T5,
|
||||
ORC_VAR_D1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T6, ORC_VAR_T9, ORC_VAR_T8,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "mergewl", 0, ORC_VAR_T11, ORC_VAR_T6,
|
||||
ORC_VAR_T4, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addb", 2, ORC_VAR_D1, ORC_VAR_T11, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
#endif
|
||||
|
||||
orc_program_compile (p);
|
||||
c = orc_program_take_code (p);
|
||||
orc_program_free (p);
|
||||
}
|
||||
p_inited = TRUE;
|
||||
orc_once_mutex_unlock ();
|
||||
}
|
||||
ex->arrays[ORC_VAR_A2] = c;
|
||||
ex->program = 0;
|
||||
|
||||
ex->n = n;
|
||||
ex->arrays[ORC_VAR_D1] = d1;
|
||||
ex->arrays[ORC_VAR_S1] = (void *) s1;
|
||||
ex->arrays[ORC_VAR_S2] = (void *) s2;
|
||||
ex->arrays[ORC_VAR_S3] = (void *) s3;
|
||||
ex->arrays[ORC_VAR_S4] = (void *) s4;
|
||||
ex->params[ORC_VAR_P1] = p1;
|
||||
ex->params[ORC_VAR_P2] = p2;
|
||||
ex->params[ORC_VAR_P3] = p3;
|
||||
ex->params[ORC_VAR_P4] = p4;
|
||||
ex->params[ORC_VAR_P5] = p5;
|
||||
|
||||
func = c->exec;
|
||||
func (ex);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* video_orc_matrix8 */
|
||||
#ifdef DISABLE_ORC
|
||||
void
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
|
||||
/* autogenerated from video-orc.orc */
|
||||
|
||||
#ifndef _VIDEO_ORC_H_
|
||||
#define _VIDEO_ORC_H_
|
||||
#pragma once
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
|
@ -189,6 +188,8 @@ void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const
|
|||
void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
|
||||
void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
|
||||
void video_orc_convert_I420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
|
||||
void video_orc_convert_A420_ARGB (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, int p2, int p3, int p4, int p5, int n);
|
||||
void video_orc_convert_A420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, int p1, int p2, int p3, int p4, int p5, int n);
|
||||
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n);
|
||||
void _custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, orc_int64 p4, int n);
|
||||
void video_orc_resample_h_near_u32_lq (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int p1, int p2, int n);
|
||||
|
@ -242,5 +243,3 @@ void video_orc_convert_UYVY_GRAY8 (guint8 * ORC_RESTRICT d1, int d1_stride, cons
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -1876,6 +1876,118 @@ mergebw wb, g, b
|
|||
mergewl x, wr, wb
|
||||
x4 addb argb, x, c4128
|
||||
|
||||
.function video_orc_convert_A420_ARGB
|
||||
.dest 4 argb guint8
|
||||
.source 1 y guint8
|
||||
.source 1 u guint8
|
||||
.source 1 v guint8
|
||||
.source 1 a guint8
|
||||
.param 2 p1
|
||||
.param 2 p2
|
||||
.param 2 p3
|
||||
.param 2 p4
|
||||
.param 2 p5
|
||||
.temp 2 wy
|
||||
.temp 2 wu
|
||||
.temp 2 wv
|
||||
.temp 2 wr
|
||||
.temp 2 wg
|
||||
.temp 2 wb
|
||||
.temp 1 r
|
||||
.temp 1 g
|
||||
.temp 1 b
|
||||
.temp 1 da
|
||||
.temp 4 x
|
||||
.const 1 c128 128
|
||||
|
||||
subb r, y, c128
|
||||
splatbw wy, r
|
||||
loadupdb r, u
|
||||
subb r, r, c128
|
||||
splatbw wu, r
|
||||
loadupdb r, v
|
||||
subb r, r, c128
|
||||
splatbw wv, r
|
||||
subb da, a, c128
|
||||
|
||||
mulhsw wy, wy, p1
|
||||
|
||||
mulhsw wr, wv, p2
|
||||
addw wr, wy, wr
|
||||
convssswb r, wr
|
||||
mergebw wr, da, r
|
||||
|
||||
mulhsw wb, wu, p3
|
||||
addw wb, wy, wb
|
||||
convssswb b, wb
|
||||
|
||||
mulhsw wg, wu, p4
|
||||
addw wg, wy, wg
|
||||
mulhsw wy, wv, p5
|
||||
addw wg, wg, wy
|
||||
|
||||
convssswb g, wg
|
||||
|
||||
mergebw wb, g, b
|
||||
mergewl x, wr, wb
|
||||
x4 addb argb, x, c128
|
||||
|
||||
.function video_orc_convert_A420_BGRA
|
||||
.dest 4 argb guint8
|
||||
.source 1 y guint8
|
||||
.source 1 u guint8
|
||||
.source 1 v guint8
|
||||
.source 1 a guint8
|
||||
.param 2 p1
|
||||
.param 2 p2
|
||||
.param 2 p3
|
||||
.param 2 p4
|
||||
.param 2 p5
|
||||
.temp 2 wy
|
||||
.temp 2 wu
|
||||
.temp 2 wv
|
||||
.temp 2 wr
|
||||
.temp 2 wg
|
||||
.temp 2 wb
|
||||
.temp 1 r
|
||||
.temp 1 g
|
||||
.temp 1 b
|
||||
.temp 1 da
|
||||
.temp 4 x
|
||||
.const 1 c128 128
|
||||
|
||||
subb r, y, c128
|
||||
splatbw wy, r
|
||||
loadupdb r, u
|
||||
subb r, r, c128
|
||||
splatbw wu, r
|
||||
loadupdb r, v
|
||||
subb r, r, c128
|
||||
splatbw wv, r
|
||||
subb da, a, c128
|
||||
|
||||
mulhsw wy, wy, p1
|
||||
|
||||
mulhsw wr, wv, p2
|
||||
addw wr, wy, wr
|
||||
convssswb r, wr
|
||||
mergebw wr, r, da
|
||||
|
||||
mulhsw wb, wu, p3
|
||||
addw wb, wy, wb
|
||||
convssswb b, wb
|
||||
|
||||
mulhsw wg, wu, p4
|
||||
addw wg, wy, wg
|
||||
mulhsw wy, wv, p5
|
||||
addw wg, wg, wy
|
||||
|
||||
convssswb g, wg
|
||||
|
||||
mergebw wb, b, g
|
||||
mergewl x, wb, wr
|
||||
x4 addb argb, x, c128
|
||||
|
||||
.function video_orc_matrix8
|
||||
.backup _custom_video_orc_matrix8
|
||||
.source 4 argb guint8
|
||||
|
|
Loading…
Reference in a new issue