mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-24 01:00:37 +00:00
video-converter: add orc optimized matrix8 function
Add an ORC implementation of the matrix8 function. Regenerate video-orc-dist.[ch]
This commit is contained in:
parent
c47b005197
commit
b5f3e5261a
4 changed files with 1512 additions and 989 deletions
|
@ -66,6 +66,9 @@ struct _GstVideoConverter
|
|||
gint in_bits;
|
||||
gint out_bits;
|
||||
gint cmatrix[4][4];
|
||||
guint64 orc_p1;
|
||||
guint64 orc_p2;
|
||||
guint64 orc_p3;
|
||||
|
||||
GstStructure *config;
|
||||
GstVideoDitherMethod dither;
|
||||
|
@ -344,6 +347,12 @@ gst_video_converter_frame (GstVideoConverter * convert,
|
|||
static void
|
||||
video_converter_matrix8 (GstVideoConverter * convert, gpointer pixels)
|
||||
{
|
||||
#if 1
|
||||
video_orc_matrix8 (pixels, pixels, convert->orc_p1, convert->orc_p2,
|
||||
convert->orc_p3, convert->width);
|
||||
#elif 0
|
||||
/* FIXME we would like to set this as a backup function, it's faster than the
|
||||
* orc generated one */
|
||||
int i;
|
||||
int r, g, b;
|
||||
int y, u, v;
|
||||
|
@ -365,6 +374,7 @@ video_converter_matrix8 (GstVideoConverter * convert, gpointer pixels)
|
|||
p[i * 4 + 2] = CLAMP (u, 0, 255);
|
||||
p[i * 4 + 3] = CLAMP (v, 0, 255);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -627,6 +637,16 @@ video_converter_compute_matrix (GstVideoConverter * convert)
|
|||
GST_DEBUG ("[%6d %6d %6d %6d]", convert->cmatrix[3][0],
|
||||
convert->cmatrix[3][1], convert->cmatrix[3][2], convert->cmatrix[3][3]);
|
||||
|
||||
convert->orc_p1 = (((guint64) (guint16) convert->cmatrix[2][0]) << 48) |
|
||||
(((guint64) (guint16) convert->cmatrix[1][0]) << 32) |
|
||||
(((guint64) (guint16) convert->cmatrix[0][0]) << 16);
|
||||
convert->orc_p2 = (((guint64) (guint16) convert->cmatrix[2][1]) << 48) |
|
||||
(((guint64) (guint16) convert->cmatrix[1][1]) << 32) |
|
||||
(((guint64) (guint16) convert->cmatrix[0][1]) << 16);
|
||||
convert->orc_p3 = (((guint64) (guint16) convert->cmatrix[2][2]) << 48) |
|
||||
(((guint64) (guint16) convert->cmatrix[1][2]) << 32) |
|
||||
(((guint64) (guint16) convert->cmatrix[0][2]) << 16);
|
||||
|
||||
return TRUE;
|
||||
|
||||
/* ERRORS */
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -148,6 +148,7 @@ void video_orc_convert_AYUV_BGRA (guint8 * ORC_RESTRICT d1, int d1_stride, const
|
|||
void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
|
||||
void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
|
||||
void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
|
||||
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, int n);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -1230,3 +1230,57 @@ convssswb g, wg
|
|||
mergebw wb, b, g
|
||||
mergewl x, wb, wr
|
||||
x4 addb argb, x, c128
|
||||
|
||||
.function video_orc_matrix8
|
||||
.source 4 argb guint8
|
||||
.dest 4 ayuv guint8
|
||||
.longparam 8 p1
|
||||
.longparam 8 p2
|
||||
.longparam 8 p3
|
||||
.const 1 c128 128
|
||||
.temp 2 w1
|
||||
.temp 2 w2
|
||||
.temp 1 b1
|
||||
.temp 1 b2
|
||||
.temp 4 l1
|
||||
.temp 4 ayuv2
|
||||
.temp 8 aq
|
||||
.temp 8 q1
|
||||
.temp 8 pr1
|
||||
.temp 8 pr2
|
||||
.temp 8 pr3
|
||||
|
||||
loadpq pr1, p1
|
||||
loadpq pr2, p2
|
||||
loadpq pr3, p3
|
||||
|
||||
x4 subb l1, argb, c128
|
||||
|
||||
select0lw w1, l1
|
||||
select1lw w2, l1
|
||||
select0wb b1, w1
|
||||
select1wb b2, w1
|
||||
|
||||
splatbl l1, b1
|
||||
mergelq aq, l1, l1
|
||||
andq aq, aq, 0xff
|
||||
|
||||
splatbl l1, b2
|
||||
mergelq q1, l1, l1
|
||||
x4 mulhsw q1, q1, pr1
|
||||
x4 addssw aq, aq, q1
|
||||
|
||||
select0wb b1, w2
|
||||
splatbl l1,b1
|
||||
mergelq q1, l1, l1
|
||||
x4 mulhsw q1, q1, pr2
|
||||
x4 addssw aq, aq, q1
|
||||
|
||||
select1wb b2, w2
|
||||
splatbl l1, b2
|
||||
mergelq q1, l1, l1
|
||||
x4 mulhsw q1, q1, pr3
|
||||
x4 addssw aq, aq, q1
|
||||
|
||||
x4 convssswb ayuv2, aq
|
||||
x4 addb ayuv, ayuv2, c128
|
||||
|
|
Loading…
Reference in a new issue