mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-12 02:15:31 +00:00
video-converter: add orc optimized matrix8 function
Add an ORC implementation of the matrix8 function. Regenerate video-orc-dist.[ch]
This commit is contained in:
parent
c47b005197
commit
b5f3e5261a
4 changed files with 1512 additions and 989 deletions
|
@ -66,6 +66,9 @@ struct _GstVideoConverter
|
||||||
gint in_bits;
|
gint in_bits;
|
||||||
gint out_bits;
|
gint out_bits;
|
||||||
gint cmatrix[4][4];
|
gint cmatrix[4][4];
|
||||||
|
guint64 orc_p1;
|
||||||
|
guint64 orc_p2;
|
||||||
|
guint64 orc_p3;
|
||||||
|
|
||||||
GstStructure *config;
|
GstStructure *config;
|
||||||
GstVideoDitherMethod dither;
|
GstVideoDitherMethod dither;
|
||||||
|
@ -344,6 +347,12 @@ gst_video_converter_frame (GstVideoConverter * convert,
|
||||||
static void
|
static void
|
||||||
video_converter_matrix8 (GstVideoConverter * convert, gpointer pixels)
|
video_converter_matrix8 (GstVideoConverter * convert, gpointer pixels)
|
||||||
{
|
{
|
||||||
|
#if 1
|
||||||
|
video_orc_matrix8 (pixels, pixels, convert->orc_p1, convert->orc_p2,
|
||||||
|
convert->orc_p3, convert->width);
|
||||||
|
#elif 0
|
||||||
|
/* FIXME we would like to set this as a backup function, it's faster than the
|
||||||
|
* orc generated one */
|
||||||
int i;
|
int i;
|
||||||
int r, g, b;
|
int r, g, b;
|
||||||
int y, u, v;
|
int y, u, v;
|
||||||
|
@ -365,6 +374,7 @@ video_converter_matrix8 (GstVideoConverter * convert, gpointer pixels)
|
||||||
p[i * 4 + 2] = CLAMP (u, 0, 255);
|
p[i * 4 + 2] = CLAMP (u, 0, 255);
|
||||||
p[i * 4 + 3] = CLAMP (v, 0, 255);
|
p[i * 4 + 3] = CLAMP (v, 0, 255);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -627,6 +637,16 @@ video_converter_compute_matrix (GstVideoConverter * convert)
|
||||||
GST_DEBUG ("[%6d %6d %6d %6d]", convert->cmatrix[3][0],
|
GST_DEBUG ("[%6d %6d %6d %6d]", convert->cmatrix[3][0],
|
||||||
convert->cmatrix[3][1], convert->cmatrix[3][2], convert->cmatrix[3][3]);
|
convert->cmatrix[3][1], convert->cmatrix[3][2], convert->cmatrix[3][3]);
|
||||||
|
|
||||||
|
convert->orc_p1 = (((guint64) (guint16) convert->cmatrix[2][0]) << 48) |
|
||||||
|
(((guint64) (guint16) convert->cmatrix[1][0]) << 32) |
|
||||||
|
(((guint64) (guint16) convert->cmatrix[0][0]) << 16);
|
||||||
|
convert->orc_p2 = (((guint64) (guint16) convert->cmatrix[2][1]) << 48) |
|
||||||
|
(((guint64) (guint16) convert->cmatrix[1][1]) << 32) |
|
||||||
|
(((guint64) (guint16) convert->cmatrix[0][1]) << 16);
|
||||||
|
convert->orc_p3 = (((guint64) (guint16) convert->cmatrix[2][2]) << 48) |
|
||||||
|
(((guint64) (guint16) convert->cmatrix[1][2]) << 32) |
|
||||||
|
(((guint64) (guint16) convert->cmatrix[0][2]) << 16);
|
||||||
|
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
/* ERRORS */
|
/* ERRORS */
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -148,6 +148,7 @@ void video_orc_convert_AYUV_BGRA (guint8 * ORC_RESTRICT d1, int d1_stride, const
|
||||||
void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
|
void video_orc_convert_AYUV_ABGR (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
|
||||||
void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
|
void video_orc_convert_AYUV_RGBA (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int p2, int p3, int p4, int p5, int n, int m);
|
||||||
void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
|
void video_orc_convert_I420_BGRA (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, int p1, int p2, int p3, int p4, int p5, int n);
|
||||||
|
void video_orc_matrix8 (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3, int n);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -1230,3 +1230,57 @@ convssswb g, wg
|
||||||
mergebw wb, b, g
|
mergebw wb, b, g
|
||||||
mergewl x, wb, wr
|
mergewl x, wb, wr
|
||||||
x4 addb argb, x, c128
|
x4 addb argb, x, c128
|
||||||
|
|
||||||
|
.function video_orc_matrix8
|
||||||
|
.source 4 argb guint8
|
||||||
|
.dest 4 ayuv guint8
|
||||||
|
.longparam 8 p1
|
||||||
|
.longparam 8 p2
|
||||||
|
.longparam 8 p3
|
||||||
|
.const 1 c128 128
|
||||||
|
.temp 2 w1
|
||||||
|
.temp 2 w2
|
||||||
|
.temp 1 b1
|
||||||
|
.temp 1 b2
|
||||||
|
.temp 4 l1
|
||||||
|
.temp 4 ayuv2
|
||||||
|
.temp 8 aq
|
||||||
|
.temp 8 q1
|
||||||
|
.temp 8 pr1
|
||||||
|
.temp 8 pr2
|
||||||
|
.temp 8 pr3
|
||||||
|
|
||||||
|
loadpq pr1, p1
|
||||||
|
loadpq pr2, p2
|
||||||
|
loadpq pr3, p3
|
||||||
|
|
||||||
|
x4 subb l1, argb, c128
|
||||||
|
|
||||||
|
select0lw w1, l1
|
||||||
|
select1lw w2, l1
|
||||||
|
select0wb b1, w1
|
||||||
|
select1wb b2, w1
|
||||||
|
|
||||||
|
splatbl l1, b1
|
||||||
|
mergelq aq, l1, l1
|
||||||
|
andq aq, aq, 0xff
|
||||||
|
|
||||||
|
splatbl l1, b2
|
||||||
|
mergelq q1, l1, l1
|
||||||
|
x4 mulhsw q1, q1, pr1
|
||||||
|
x4 addssw aq, aq, q1
|
||||||
|
|
||||||
|
select0wb b1, w2
|
||||||
|
splatbl l1,b1
|
||||||
|
mergelq q1, l1, l1
|
||||||
|
x4 mulhsw q1, q1, pr2
|
||||||
|
x4 addssw aq, aq, q1
|
||||||
|
|
||||||
|
select1wb b2, w2
|
||||||
|
splatbl l1, b2
|
||||||
|
mergelq q1, l1, l1
|
||||||
|
x4 mulhsw q1, q1, pr3
|
||||||
|
x4 addssw aq, aq, q1
|
||||||
|
|
||||||
|
x4 convssswb ayuv2, aq
|
||||||
|
x4 addb ayuv, ayuv2, c128
|
||||||
|
|
Loading…
Reference in a new issue