video-converter: add orc optimized matrix8 function

Add an ORC implementation of the matrix8 function.
Regenerate video-orc-dist.[ch]
This commit is contained in:
Wim Taymans 2014-09-25 17:32:32 +02:00
parent c47b005197
commit b5f3e5261a
4 changed files with 1512 additions and 989 deletions

View file

@ -66,6 +66,9 @@ struct _GstVideoConverter
gint in_bits;
gint out_bits;
gint cmatrix[4][4];
guint64 orc_p1;
guint64 orc_p2;
guint64 orc_p3;
GstStructure *config;
GstVideoDitherMethod dither;
@ -344,6 +347,12 @@ gst_video_converter_frame (GstVideoConverter * convert,
static void
video_converter_matrix8 (GstVideoConverter * convert, gpointer pixels)
{
#if 1
video_orc_matrix8 (pixels, pixels, convert->orc_p1, convert->orc_p2,
convert->orc_p3, convert->width);
#elif 0
/* FIXME we would like to set this as a backup function, it's faster than the
* orc generated one */
int i;
int r, g, b;
int y, u, v;
@ -365,6 +374,7 @@ video_converter_matrix8 (GstVideoConverter * convert, gpointer pixels)
p[i * 4 + 2] = CLAMP (u, 0, 255);
p[i * 4 + 3] = CLAMP (v, 0, 255);
}
#endif
}
static void
@ -627,6 +637,16 @@ video_converter_compute_matrix (GstVideoConverter * convert)
GST_DEBUG ("[%6d %6d %6d %6d]", convert->cmatrix[3][0],
convert->cmatrix[3][1], convert->cmatrix[3][2], convert->cmatrix[3][3]);
convert->orc_p1 = (((guint64) (guint16) convert->cmatrix[2][0]) << 48) |
(((guint64) (guint16) convert->cmatrix[1][0]) << 32) |
(((guint64) (guint16) convert->cmatrix[0][0]) << 16);
convert->orc_p2 = (((guint64) (guint16) convert->cmatrix[2][1]) << 48) |
(((guint64) (guint16) convert->cmatrix[1][1]) << 32) |
(((guint64) (guint16) convert->cmatrix[0][1]) << 16);
convert->orc_p3 = (((guint64) (guint16) convert->cmatrix[2][2]) << 48) |
(((guint64) (guint16) convert->cmatrix[1][2]) << 32) |
(((guint64) (guint16) convert->cmatrix[0][2]) << 16);
return TRUE;
/* ERRORS */

File diff suppressed because it is too large Load diff

View file

@ -148,6 +148,7 @@ void video_orc_convert_AYUV_BGRA (guint8 * ORC_RESTRICT d1, int d1_stride, const
void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, int n);
#ifdef __cplusplus
}

View file

@ -1230,3 +1230,57 @@ convssswb g, wg
mergebw wb, b, g
mergewl x, wb, wr
x4 addb argb, x, c128
.function video_orc_matrix8
.source 4 argb guint8
.dest 4 ayuv guint8
.longparam 8 p1
.longparam 8 p2
.longparam 8 p3
.const 1 c128 128
.temp 2 w1
.temp 2 w2
.temp 1 b1
.temp 1 b2
.temp 4 l1
.temp 4 ayuv2
.temp 8 aq
.temp 8 q1
.temp 8 pr1
.temp 8 pr2
.temp 8 pr3
loadpq pr1, p1
loadpq pr2, p2
loadpq pr3, p3
x4 subb l1, argb, c128
select0lw w1, l1
select1lw w2, l1
select0wb b1, w1
select1wb b2, w1
splatbl l1, b1
mergelq aq, l1, l1
andq aq, aq, 0xff
splatbl l1, b2
mergelq q1, l1, l1
x4 mulhsw q1, q1, pr1
x4 addssw aq, aq, q1
select0wb b1, w2
splatbl l1,b1
mergelq q1, l1, l1
x4 mulhsw q1, q1, pr2
x4 addssw aq, aq, q1
select1wb b2, w2
splatbl l1, b2
mergelq q1, l1, l1
x4 mulhsw q1, q1, pr3
x4 addssw aq, aq, q1
x4 convssswb ayuv2, aq
x4 addb ayuv, ayuv2, c128