video-converter: improve matrix8 function

Avoid using a constant.
Avoid doing saturated adds, results are not supposed to overflow here.
Rework the C backup function a little in preparation for custom backup
functions in ORC.

See https://bugzilla.gnome.org/show_bug.cgi?id=741015
This commit is contained in:
Wim Taymans 2014-12-05 12:18:42 +01:00
parent b2413d46ed
commit 04e9a8dcc5
2 changed files with 61 additions and 34 deletions

View file

@ -110,6 +110,7 @@ struct _MatrixData
guint64 orc_p1; guint64 orc_p1;
guint64 orc_p2; guint64 orc_p2;
guint64 orc_p3; guint64 orc_p3;
guint64 orc_p4;
void (*matrix_func) (MatrixData * data, gpointer pixels); void (*matrix_func) (MatrixData * data, gpointer pixels);
}; };
@ -833,38 +834,53 @@ color_matrix_RGB_to_XYZ (MatrixData * dst, double Rx, double Ry, double Gx,
color_matrix_copy (dst, &m); color_matrix_copy (dst, &m);
} }
#if 0
void
_custom_video_orc_matrix8 (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, orc_int64 p1, orc_int64 p2, orc_int64 p3,
orc_int64 p4, int n)
{
gint i;
gint r, g, b;
gint y, u, v;
gint a00, a01, a02, a03;
gint a10, a11, a12, a13;
gint a20, a21, a22, a23;
a00 = (gint16) (p1 >> 16);
a01 = (gint16) (p2 >> 16);
a02 = (gint16) (p3 >> 16);
a03 = (gint16) (p4 >> 16);
a10 = (gint16) (p1 >> 32);
a11 = (gint16) (p2 >> 32);
a12 = (gint16) (p3 >> 32);
a13 = (gint16) (p4 >> 32);
a20 = (gint16) (p1 >> 48);
a21 = (gint16) (p2 >> 48);
a22 = (gint16) (p3 >> 48);
a23 = (gint16) (p4 >> 48);
for (i = 0; i < n; i++) {
r = s1[i * 4 + 1];
g = s1[i * 4 + 2];
b = s1[i * 4 + 3];
y = ((a00 * r + a01 * g + a02 * b) >> SCALE) + a03;
u = ((a10 * r + a11 * g + a12 * b) >> SCALE) + a13;
v = ((a20 * r + a21 * g + a22 * b) >> SCALE) + a23;
d1[i * 4 + 1] = CLAMP (y, 0, 255);
d1[i * 4 + 2] = CLAMP (u, 0, 255);
d1[i * 4 + 3] = CLAMP (v, 0, 255);
}
}
#endif
static void static void
video_converter_matrix8 (MatrixData * data, gpointer pixels) video_converter_matrix8 (MatrixData * data, gpointer pixels)
{ {
#if 1
video_orc_matrix8 (pixels, pixels, data->orc_p1, data->orc_p2, video_orc_matrix8 (pixels, pixels, data->orc_p1, data->orc_p2,
data->orc_p3, data->width); data->orc_p3, data->orc_p4, data->width);
#elif 0
/* FIXME we would like to set this as a backup function, it's faster than the
* orc generated one */
int i;
int r, g, b;
int y, u, v;
guint8 *p = pixels;
gint width = data->width;
for (i = 0; i < width; i++) {
r = p[i * 4 + 1];
g = p[i * 4 + 2];
b = p[i * 4 + 3];
y = (data->im[0][0] * r + data->im[0][1] * g +
data->im[0][2] * b + data->im[0][3]) >> SCALE;
u = (data->im[1][0] * r + data->im[1][1] * g +
data->im[1][2] * b + data->im[1][3]) >> SCALE;
v = (data->im[2][0] * r + data->im[2][1] * g +
data->im[2][2] * b + data->im[2][3]) >> SCALE;
p[i * 4 + 1] = CLAMP (y, 0, 255);
p[i * 4 + 2] = CLAMP (u, 0, 255);
p[i * 4 + 3] = CLAMP (v, 0, 255);
}
#endif
} }
static void static void
@ -929,6 +945,8 @@ prepare_matrix (GstVideoConverter * convert, MatrixData * data)
GST_DEBUG ("use fast AYUV -> RGB matrix"); GST_DEBUG ("use fast AYUV -> RGB matrix");
data->matrix_func = video_converter_matrix8_AYUV_ARGB; data->matrix_func = video_converter_matrix8_AYUV_ARGB;
} else { } else {
gint a03, a13, a23;
GST_DEBUG ("use 8bit matrix"); GST_DEBUG ("use 8bit matrix");
data->matrix_func = video_converter_matrix8; data->matrix_func = video_converter_matrix8;
@ -941,6 +959,13 @@ prepare_matrix (GstVideoConverter * convert, MatrixData * data)
data->orc_p3 = (((guint64) (guint16) data->im[2][2]) << 48) | data->orc_p3 = (((guint64) (guint16) data->im[2][2]) << 48) |
(((guint64) (guint16) data->im[1][2]) << 32) | (((guint64) (guint16) data->im[1][2]) << 32) |
(((guint64) (guint16) data->im[0][2]) << 16); (((guint64) (guint16) data->im[0][2]) << 16);
a03 = data->im[0][3] >> SCALE;
a13 = data->im[1][3] >> SCALE;
a23 = data->im[2][3] >> SCALE;
data->orc_p4 = (((guint64) (guint16) a23) << 48) |
(((guint64) (guint16) a13) << 32) | (((guint64) (guint16) a03) << 16);
} }
} else { } else {
GST_DEBUG ("use 16bit matrix"); GST_DEBUG ("use 16bit matrix");

View file

@ -1274,11 +1274,13 @@ mergewl x, wb, wr
x4 addb argb, x, c128 x4 addb argb, x, c128
.function video_orc_matrix8 .function video_orc_matrix8
#.backup _custom_video_orc_matrix8
.source 4 argb guint8 .source 4 argb guint8
.dest 4 ayuv guint8 .dest 4 ayuv guint8
.longparam 8 p1 .longparam 8 p1
.longparam 8 p2 .longparam 8 p2
.longparam 8 p3 .longparam 8 p3
.longparam 8 p4
.const 1 c128 128 .const 1 c128 128
.temp 2 w1 .temp 2 w1
.temp 2 w2 .temp 2 w2
@ -1303,26 +1305,26 @@ select1lw w2, l1
select0wb b1, w1 select0wb b1, w1
select1wb b2, w1 select1wb b2, w1
splatbl l1, b1 convubw w1, b1
mergelq aq, l1, l1 convuwl l1, w1
andq aq, aq, 0xff x4 mergebw aq, l1, l1
splatbl l1, b2 splatbl l1, b2
mergelq q1, l1, l1 mergelq q1, l1, l1
x4 mulhsw q1, q1, pr1 x4 mulhsw q1, q1, pr1
x4 addssw aq, aq, q1 x4 addw aq, aq, q1
select0wb b1, w2 select0wb b1, w2
splatbl l1,b1 splatbl l1,b1
mergelq q1, l1, l1 mergelq q1, l1, l1
x4 mulhsw q1, q1, pr2 x4 mulhsw q1, q1, pr2
x4 addssw aq, aq, q1 x4 addw aq, aq, q1
select1wb b2, w2 select1wb b2, w2
splatbl l1, b2 splatbl l1, b2
mergelq q1, l1, l1 mergelq q1, l1, l1
x4 mulhsw q1, q1, pr3 x4 mulhsw q1, q1, pr3
x4 addssw aq, aq, q1 x4 addw aq, aq, q1
x4 convssswb ayuv2, aq x4 convssswb ayuv2, aq
x4 addb ayuv, ayuv2, c128 x4 addb ayuv, ayuv2, c128