video-orc: make RGB pack/unpack faster

Avoid all the merging and splitting and use a pair of shifts and or
This commit is contained in:
Wim Taymans 2014-12-10 16:42:12 +01:00
parent 6e492304dd
commit 903ba19593

View file

@ -305,80 +305,46 @@ swapl bgra, argb
.function video_orc_pack_RGBA .function video_orc_pack_RGBA
.dest 4 rgba guint8 .dest 4 rgba guint8
.source 4 argb guint8 .source 4 argb guint8
.temp 1 a .temp 4 a
.temp 1 r .temp 4 r
.temp 1 g
.temp 1 b
.temp 2 rg
.temp 2 ba
.temp 2 ar
.temp 2 gb
splitlw gb, ar, argb loadl r, argb
splitwb b, g, gb shrul a, r, 8
splitwb r, a, ar shll r, r, 24
mergebw ba, b, a orl rgba, r, a
mergebw rg, r, g
mergewl rgba, rg, ba
.function video_orc_unpack_RGBA .function video_orc_unpack_RGBA
.dest 4 argb guint8 .dest 4 argb guint8
.source 4 rgba guint8 .source 4 rgba guint8
.temp 1 a .temp 4 a
.temp 1 r .temp 4 r
.temp 1 g
.temp 1 b
.temp 2 rg
.temp 2 ba
.temp 2 ar
.temp 2 gb
splitlw ba, rg, rgba
splitwb g, r, rg
splitwb a, b, ba
mergebw ar, a, r
mergebw gb, g, b
mergewl argb, ar, gb
loadl r, rgba
shll a, r, 8
shrul r, r, 24
orl argb, r, a
.function video_orc_unpack_ABGR .function video_orc_unpack_ABGR
.dest 4 argb guint8 .dest 4 argb guint8
.source 4 abgr guint8 .source 4 abgr guint8
.temp 1 a .temp 4 a
.temp 1 r .temp 4 r
.temp 1 g
.temp 1 b
.temp 2 gr
.temp 2 ab
.temp 2 ar
.temp 2 gb
splitlw gr, ab, abgr
splitwb r, g, gr
splitwb b, a, ab
mergebw ar, a, r
mergebw gb, g, b
mergewl argb, ar, gb
swapl r, abgr
shll a, r, 8
shrul r, r, 24
orl argb, r, a
.function video_orc_pack_ABGR .function video_orc_pack_ABGR
.dest 4 abgr guint8 .dest 4 abgr guint8
.source 4 argb guint8 .source 4 argb guint8
.temp 1 a .temp 4 a
.temp 1 r .temp 4 r
.temp 1 g
.temp 1 b
.temp 2 gr
.temp 2 ab
.temp 2 ar
.temp 2 gb
splitlw gb, ar, argb swapl r, argb
splitwb b, g, gb shll a, r, 8
splitwb r, a, ar shrul r, r, 24
mergebw ab, a, b orl abgr, r, a
mergebw gr, g, r
mergewl abgr, ab, gr
.function video_orc_unpack_NV12 .function video_orc_unpack_NV12
.dest 8 d guint8 .dest 8 d guint8