cog: Add fast paths for colorspace conversion

This commit is contained in:
David Schleef 2010-08-20 17:24:23 -07:00
parent 62d7f1f991
commit 135908dfe1
2 changed files with 460 additions and 12 deletions

View file

@ -756,6 +756,72 @@ convert_Y444_AYUV (CogFrame * dest, CogFrame * src)
src->components[2].stride, dest->width, dest->height);
}
static void
convert_AYUV_ARGB (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_ARGB (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, dest->width, dest->height);
}
static void
convert_AYUV_BGRA (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_BGRA (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, dest->width, dest->height);
}
static void
convert_AYUV_ABGR (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_ABGR (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, dest->width, dest->height);
}
static void
convert_AYUV_RGBA (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_RGBA (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, dest->width, dest->height);
}
static void
convert_I420_BGRA (CogFrame * dest, CogFrame * src)
{
int i;
int quality = 0;
if (quality > 3) {
for (i = 0; i < dest->height; i++) {
if (i & 1) {
cogorc_convert_I420_BGRA_avg (COG_FRAME_DATA_GET_LINE (dest->components
+ 0, i), COG_FRAME_DATA_GET_LINE (src->components + 0, i),
COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 1, (i >> 1) + 1),
COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 2, (i >> 1) + 1),
dest->width);
} else {
cogorc_convert_I420_BGRA (COG_FRAME_DATA_GET_LINE (dest->components + 0,
i), COG_FRAME_DATA_GET_LINE (src->components + 0, i),
COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1), dest->width);
}
}
} else {
for (i = 0; i < dest->height; i++) {
cogorc_convert_I420_BGRA (COG_FRAME_DATA_GET_LINE (dest->components + 0,
i), COG_FRAME_DATA_GET_LINE (src->components + 0, i),
COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1), dest->width);
}
}
}
@ -801,6 +867,17 @@ static CogColorspaceTransform transforms[] = {
{GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_UYVY, convert_Y444_UYVY},
{GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_AYUV, convert_Y444_AYUV},
{GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_Y42B, convert_Y444_Y42B},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_ARGB, convert_AYUV_ARGB},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_BGRA, convert_AYUV_BGRA},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_xRGB, convert_AYUV_ARGB}, /* alias */
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_BGRx, convert_AYUV_BGRA}, /* alias */
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_ABGR, convert_AYUV_ABGR},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_RGBA, convert_AYUV_RGBA},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_xBGR, convert_AYUV_ABGR}, /* alias */
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_RGBx, convert_AYUV_RGBA}, /* alias */
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_BGRA, convert_I420_BGRA},
};
static GstFlowReturn
@ -860,7 +937,7 @@ gst_cogcolorspace_transform (GstBaseTransform * base_transform,
}
}
GST_ERROR ("no match");
GST_DEBUG ("no fastpath match %d %d", in_format, out_format);
}
switch (out_format) {

View file

@ -1026,30 +1026,401 @@ mergewl ayuv, ay, uv
.function cogorc_convert_AYUV_ARGB
.flags 2d
.dest 4 argb
.source 4 ayuv
.temp 2 t1
.temp 2 t2
.temp 1 a
.temp 1 y
.temp 1 u
.temp 1 v
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 4 x
.const 1 c8 8
x4 subb x, ayuv, 128
splitlw t1, t2, x
splitwb y, a, t2
splitwb v, u, t1
convsbw wy, y
convsbw wu, u
convsbw wv, v
mullw t1, wy, 42
shrsw t1, t1, c8
addssw wy, wy, t1
addssw wr, wy, wv
mullw t1, wv, 103
shrsw t1, t1, c8
subssw wr, wr, t1
addssw wr, wr, wv
addssw wb, wy, wu
addssw wb, wb, wu
mullw t1, wu, 4
shrsw t1, t1, c8
addssw wb, wb, t1
mullw t1, wu, 100
shrsw t1, t1, c8
subssw wg, wy, t1
mullw t1, wv, 104
shrsw t1, t1, c8
subssw wg, wg, t1
subssw wg, wg, t1
convssswb r, wr
convssswb g, wg
convssswb b, wb
mergebw t1, a, r
mergebw t2, g, b
mergewl x, t1, t2
x4 addb argb, x, 128
.function cogorc_convert_AYUV_BGRA
.flags 2d
.dest 4 argb
.source 4 ayuv
.temp 2 t1
.temp 2 t2
.temp 1 a
.temp 1 y
.temp 1 u
.temp 1 v
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 4 x
.const 1 c8 8
x4 subb x, ayuv, 128
splitlw t1, t2, x
splitwb y, a, t2
splitwb v, u, t1
convsbw wy, y
convsbw wu, u
convsbw wv, v
mullw t1, wy, 42
shrsw t1, t1, c8
addssw wy, wy, t1
addssw wr, wy, wv
mullw t1, wv, 103
shrsw t1, t1, c8
subssw wr, wr, t1
addssw wr, wr, wv
addssw wb, wy, wu
addssw wb, wb, wu
mullw t1, wu, 4
shrsw t1, t1, c8
addssw wb, wb, t1
mullw t1, wu, 100
shrsw t1, t1, c8
subssw wg, wy, t1
mullw t1, wv, 104
shrsw t1, t1, c8
subssw wg, wg, t1
subssw wg, wg, t1
convssswb r, wr
convssswb g, wg
convssswb b, wb
mergebw t1, b, g
mergebw t2, r, a
mergewl x, t1, t2
x4 addb argb, x, 128
.function cogorc_convert_AYUV_ABGR
.flags 2d
.dest 4 argb
.source 4 ayuv
.temp 2 t1
.temp 2 t2
.temp 1 a
.temp 1 y
.temp 1 u
.temp 1 v
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 4 x
.const 1 c8 8
x4 subb x, ayuv, 128
splitlw t1, t2, x
splitwb y, a, t2
splitwb v, u, t1
convsbw wy, y
convsbw wu, u
convsbw wv, v
mullw t1, wy, 42
shrsw t1, t1, c8
addssw wy, wy, t1
addssw wr, wy, wv
mullw t1, wv, 103
shrsw t1, t1, c8
subssw wr, wr, t1
addssw wr, wr, wv
addssw wb, wy, wu
addssw wb, wb, wu
mullw t1, wu, 4
shrsw t1, t1, c8
addssw wb, wb, t1
mullw t1, wu, 100
shrsw t1, t1, c8
subssw wg, wy, t1
mullw t1, wv, 104
shrsw t1, t1, c8
subssw wg, wg, t1
subssw wg, wg, t1
convssswb r, wr
convssswb g, wg
convssswb b, wb
mergebw t1, a, b
mergebw t2, g, r
mergewl x, t1, t2
x4 addb argb, x, 128
.function cogorc_convert_AYUV_RGBA
.flags 2d
.dest 4 argb
.source 4 ayuv
.temp 2 t1
.temp 2 t2
.temp 1 a
.temp 1 y
.temp 1 u
.temp 1 v
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 4 x
.const 1 c8 8
x4 subb x, ayuv, 128
splitlw t1, t2, x
splitwb y, a, t2
splitwb v, u, t1
convsbw wy, y
convsbw wu, u
convsbw wv, v
mullw t1, wy, 42
shrsw t1, t1, c8
addssw wy, wy, t1
addssw wr, wy, wv
mullw t1, wv, 103
shrsw t1, t1, c8
subssw wr, wr, t1
addssw wr, wr, wv
addssw wb, wy, wu
addssw wb, wb, wu
mullw t1, wu, 4
shrsw t1, t1, c8
addssw wb, wb, t1
mullw t1, wu, 100
shrsw t1, t1, c8
subssw wg, wy, t1
mullw t1, wv, 104
shrsw t1, t1, c8
subssw wg, wg, t1
subssw wg, wg, t1
convssswb r, wr
convssswb g, wg
convssswb b, wb
mergebw t1, r, g
mergebw t2, b, a
mergewl x, t1, t2
x4 addb argb, x, 128
.function cogorc_convert_I420_BGRA
.dest 4 argb
.source 1 y
.source 1 u
.source 1 v
.temp 2 t1
.temp 2 t2
.temp 1 t3
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 4 x
.const 1 c8 8
.const 1 c128 128
subb t3, y, c128
convsbw wy, t3
loadupib t3, u
subb t3, t3, c128
convsbw wu, t3
loadupib t3, v
subb t3, t3, c128
convsbw wv, t3
mullw t1, wy, 42
shrsw t1, t1, c8
addssw wy, wy, t1
addssw wr, wy, wv
mullw t1, wv, 103
shrsw t1, t1, c8
subssw wr, wr, t1
addssw wr, wr, wv
addssw wb, wy, wu
addssw wb, wb, wu
mullw t1, wu, 4
shrsw t1, t1, c8
addssw wb, wb, t1
mullw t1, wu, 100
shrsw t1, t1, c8
subssw wg, wy, t1
mullw t1, wv, 104
shrsw t1, t1, c8
subssw wg, wg, t1
subssw wg, wg, t1
convssswb r, wr
convssswb g, wg
convssswb b, wb
mergebw t1, b, g
mergebw t2, r, 255
mergewl x, t1, t2
x4 addb argb, x, c128
.function cogorc_convert_I420_BGRA_avg
.dest 4 argb
.source 1 y
.source 1 u1
.source 1 u2
.source 1 v1
.source 1 v2
.temp 2 t1
.temp 2 t2
.temp 1 t3
.temp 1 t4
.temp 2 wy
.temp 2 wu
.temp 2 wv
.temp 2 wr
.temp 2 wg
.temp 2 wb
.temp 1 r
.temp 1 g
.temp 1 b
.temp 4 x
.const 1 c8 8
.const 1 c128 128
subb t3, y, c128
convsbw wy, t3
loadupib t3, u1
loadupib t4, u2
avgub t3, t3, t4
subb t3, t3, c128
convsbw wu, t3
loadupib t3, v1
loadupib t4, v2
avgub t3, t3, t4
subb t3, t3, c128
convsbw wv, t3
mullw t1, wy, 42
shrsw t1, t1, c8
addssw wy, wy, t1
addssw wr, wy, wv
mullw t1, wv, 103
shrsw t1, t1, c8
subssw wr, wr, t1
addssw wr, wr, wv
addssw wb, wy, wu
addssw wb, wb, wu
mullw t1, wu, 4
shrsw t1, t1, c8
addssw wb, wb, t1
mullw t1, wu, 100
shrsw t1, t1, c8
subssw wg, wy, t1
mullw t1, wv, 104
shrsw t1, t1, c8
subssw wg, wg, t1
subssw wg, wg, t1
convssswb r, wr
convssswb g, wg
convssswb b, wb
mergebw t1, b, g
mergebw t2, r, 255
mergewl x, t1, t2
x4 addb argb, x, c128