mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-12 10:25:33 +00:00
effb593a93
Crossfading is a bit more complex than just having two pads with the right keyframes as the blending is not exactly the same. The difference is in the way we compute the alpha channel, in the case of crossfading, we have to compute an additive operation between the destination and the source (factored by the alpha property of both the input pad alpha property and the crossfading ratio) basically so that the crossfade result of 2 opaque frames is also fully opaque at any time in the crossfading process, avoid bleeding through the layer blending. Some rationnal can be found in https://phabricator.freedesktop.org/T7773. https://bugzilla.gnome.org/show_bug.cgi?id=784827
365 lines
7.6 KiB
Text
365 lines
7.6 KiB
Text
.function compositor_orc_splat_u32
|
|
.dest 4 d1 guint32
|
|
.param 4 p1 guint32
|
|
|
|
copyl d1, p1
|
|
|
|
.function compositor_orc_memcpy_u32
|
|
.dest 4 d1 guint32
|
|
.source 4 s1 guint32
|
|
|
|
copyl d1, s1
|
|
|
|
.function compositor_orc_blend_u8
|
|
.flags 2d
|
|
.dest 1 d1 guint8
|
|
.source 1 s1 guint8
|
|
.param 2 p1
|
|
.temp 2 t1
|
|
.temp 2 t2
|
|
.const 1 c1 8
|
|
|
|
convubw t1, d1
|
|
convubw t2, s1
|
|
subw t2, t2, t1
|
|
mullw t2, t2, p1
|
|
shlw t1, t1, c1
|
|
addw t2, t1, t2
|
|
shruw t2, t2, c1
|
|
convsuswb d1, t2
|
|
|
|
|
|
.function compositor_orc_blend_argb
|
|
.flags 2d
|
|
.dest 4 d guint8
|
|
.source 4 s guint8
|
|
.param 2 alpha
|
|
.temp 4 t
|
|
.temp 2 tw
|
|
.temp 1 tb
|
|
.temp 4 a
|
|
.temp 8 d_wide
|
|
.temp 8 s_wide
|
|
.temp 8 a_wide
|
|
.const 4 a_alpha 0x000000ff
|
|
|
|
loadl t, s
|
|
convlw tw, t
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw a_wide, a
|
|
x4 mullw a_wide, a_wide, alpha
|
|
x4 div255w a_wide, a_wide
|
|
x4 convubw s_wide, t
|
|
loadl t, d
|
|
x4 convubw d_wide, t
|
|
x4 subw s_wide, s_wide, d_wide
|
|
x4 mullw s_wide, s_wide, a_wide
|
|
|
|
x4 div255w s_wide, s_wide
|
|
x4 addw d_wide, d_wide, s_wide
|
|
x4 convwb t, d_wide
|
|
orl t, t, a_alpha
|
|
storel d, t
|
|
|
|
.function compositor_orc_blend_bgra
|
|
.flags 2d
|
|
.dest 4 d guint8
|
|
.source 4 s guint8
|
|
.param 2 alpha
|
|
.temp 4 t
|
|
.temp 4 t2
|
|
.temp 2 tw
|
|
.temp 1 tb
|
|
.temp 4 a
|
|
.temp 8 d_wide
|
|
.temp 8 s_wide
|
|
.temp 8 a_wide
|
|
.const 4 a_alpha 0xff000000
|
|
|
|
loadl t, s
|
|
shrul t2, t, 24
|
|
convlw tw, t2
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw a_wide, a
|
|
x4 mullw a_wide, a_wide, alpha
|
|
x4 div255w a_wide, a_wide
|
|
|
|
x4 convubw s_wide, t
|
|
loadl t, d
|
|
x4 convubw d_wide, t
|
|
x4 subw s_wide, s_wide, d_wide
|
|
x4 mullw s_wide, s_wide, a_wide
|
|
x4 div255w s_wide, s_wide
|
|
|
|
x4 addw d_wide, d_wide, s_wide
|
|
x4 convwb t, d_wide
|
|
orl t, t, a_alpha
|
|
storel d, t
|
|
|
|
.function compositor_orc_overlay_argb
|
|
.flags 2d
|
|
.dest 4 d guint8
|
|
.source 4 s guint8
|
|
.param 2 alpha
|
|
.temp 4 t
|
|
.temp 2 tw
|
|
.temp 1 tb
|
|
.temp 8 alpha_s
|
|
.temp 8 alpha_s_inv
|
|
.temp 8 alpha_d
|
|
.temp 4 a
|
|
.temp 8 d_wide
|
|
.temp 8 s_wide
|
|
.const 4 xfs 0xffffffff
|
|
.const 4 a_alpha 0x000000ff
|
|
.const 4 a_alpha_inv 0xffffff00
|
|
|
|
# calc source alpha as alpha_s = alpha_s * alpha / 255
|
|
loadl t, s
|
|
convlw tw, t
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_s, a
|
|
x4 mullw alpha_s, alpha_s, alpha
|
|
x4 div255w alpha_s, alpha_s
|
|
x4 convubw s_wide, t
|
|
x4 mullw s_wide, s_wide, alpha_s
|
|
|
|
# calc destination alpha as alpha_d = (255-alpha_s) * alpha_d / 255
|
|
loadpl a, xfs
|
|
x4 convubw alpha_s_inv, a
|
|
x4 subw alpha_s_inv, alpha_s_inv, alpha_s
|
|
loadl t, d
|
|
convlw tw, t
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_d, a
|
|
x4 mullw alpha_d, alpha_d, alpha_s_inv
|
|
x4 div255w alpha_d, alpha_d
|
|
x4 convubw d_wide, t
|
|
x4 mullw d_wide, d_wide, alpha_d
|
|
|
|
# calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_d*(255-alpha_s)/255
|
|
x4 addw d_wide, d_wide, s_wide
|
|
|
|
# calc the final destination alpha_d = alpha_s + alpha_d * (255-alpha_s)/255
|
|
x4 addw alpha_d, alpha_d, alpha_s
|
|
|
|
# now normalize the pix_d by the final alpha to make it associative
|
|
x4 divluw, d_wide, d_wide, alpha_d
|
|
|
|
# pack the new alpha into the correct spot
|
|
x4 convwb t, d_wide
|
|
andl t, t, a_alpha_inv
|
|
x4 convwb a, alpha_d
|
|
andl a, a, a_alpha
|
|
orl t, t, a
|
|
storel d, t
|
|
|
|
|
|
.function compositor_orc_overlay_argb_addition
|
|
.flags 2d
|
|
.dest 4 d guint8
|
|
.source 4 s guint8
|
|
.param 2 alpha
|
|
.temp 4 t
|
|
.temp 2 tw
|
|
.temp 1 tb
|
|
.temp 8 alpha_s
|
|
.temp 8 alpha_s_inv
|
|
.temp 8 alpha_factor
|
|
.temp 8 alpha_d
|
|
.temp 4 a
|
|
.temp 8 d_wide
|
|
.temp 8 s_wide
|
|
.const 4 xfs 0xffffffff
|
|
.const 4 a_alpha 0x000000ff
|
|
.const 4 a_alpha_inv 0xffffff00
|
|
|
|
# calc source alpha as alpha_s = alpha_s * alpha / 255
|
|
loadl t, s
|
|
convlw tw, t
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_s, a
|
|
x4 mullw alpha_s, alpha_s, alpha
|
|
x4 div255w alpha_s, alpha_s
|
|
x4 convubw s_wide, t
|
|
x4 mullw s_wide, s_wide, alpha_s
|
|
|
|
# calc destination alpha as alpha_factor = (255-alpha_s) * alpha_factor / factor
|
|
loadpl a, xfs
|
|
x4 convubw alpha_s_inv, a
|
|
x4 subw alpha_s_inv, alpha_s_inv, alpha_s
|
|
loadl t, d
|
|
convlw tw, t
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_factor, a
|
|
x4 mullw alpha_factor, alpha_factor, alpha_s_inv
|
|
x4 div255w alpha_factor, alpha_factor
|
|
x4 convubw d_wide, t
|
|
x4 mullw d_wide, d_wide, alpha_factor
|
|
|
|
# calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_factor*(255-alpha_s)/255
|
|
x4 addw d_wide, d_wide, s_wide
|
|
|
|
# calc the alpha factor alpha_factor = alpha_s + alpha_factor * (255-alpha_s)/255
|
|
x4 addw alpha_factor, alpha_factor, alpha_s
|
|
|
|
# now normalize the pix_d by the final alpha to make it associative
|
|
x4 divluw, d_wide, d_wide, alpha_factor
|
|
|
|
# calc the final global alpha_d = alpha_d + (alpha_s * (alpha / 255))
|
|
loadl t, d
|
|
convlw tw, t
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_d, a
|
|
x4 addw alpha_d, alpha_d, alpha_s
|
|
|
|
# pack the new alpha into the correct spot
|
|
x4 convwb t, d_wide
|
|
andl t, t, a_alpha_inv
|
|
x4 convwb a, alpha_d
|
|
andl a, a, a_alpha
|
|
orl t, t, a
|
|
storel d, t
|
|
|
|
.function compositor_orc_overlay_bgra
|
|
.flags 2d
|
|
.dest 4 d guint8
|
|
.source 4 s guint8
|
|
.param 2 alpha
|
|
.temp 4 t
|
|
.temp 4 t2
|
|
.temp 2 tw
|
|
.temp 1 tb
|
|
.temp 8 alpha_s
|
|
.temp 8 alpha_s_inv
|
|
.temp 8 alpha_d
|
|
.temp 4 a
|
|
.temp 8 d_wide
|
|
.temp 8 s_wide
|
|
.const 4 xfs 0xffffffff
|
|
.const 4 a_alpha 0xff000000
|
|
.const 4 a_alpha_inv 0x00ffffff
|
|
|
|
# calc source alpha as alpha_s = alpha_s * alpha / 255
|
|
loadl t, s
|
|
shrul t2, t, 24
|
|
convlw tw, t2
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_s, a
|
|
x4 mullw alpha_s, alpha_s, alpha
|
|
x4 div255w alpha_s, alpha_s
|
|
x4 convubw s_wide, t
|
|
x4 mullw s_wide, s_wide, alpha_s
|
|
|
|
# calc destination alpha as alpha_d = (255-alpha_s) * alpha_d / 255
|
|
loadpl a, xfs
|
|
x4 convubw alpha_s_inv, a
|
|
x4 subw alpha_s_inv, alpha_s_inv, alpha_s
|
|
loadl t, d
|
|
shrul t2, t, 24
|
|
convlw tw, t2
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_d, a
|
|
x4 mullw alpha_d, alpha_d, alpha_s_inv
|
|
x4 div255w alpha_d, alpha_d
|
|
x4 convubw d_wide, t
|
|
x4 mullw d_wide, d_wide, alpha_d
|
|
|
|
# calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_d*(255-alpha_s)/255
|
|
x4 addw d_wide, d_wide, s_wide
|
|
|
|
# calc the final destination alpha_d = alpha_s + alpha_d * (255-alpha_s)/255
|
|
x4 addw alpha_d, alpha_d, alpha_s
|
|
|
|
# now normalize the pix_d by the final alpha to make it associative
|
|
x4 divluw, d_wide, d_wide, alpha_d
|
|
|
|
# pack the new alpha into the correct spot
|
|
x4 convwb t, d_wide
|
|
andl t, t, a_alpha_inv
|
|
x4 convwb a, alpha_d
|
|
andl a, a, a_alpha
|
|
orl t, t, a
|
|
storel d, t
|
|
|
|
.function compositor_orc_overlay_bgra_addition
|
|
.flags 2d
|
|
.dest 4 d guint8
|
|
.source 4 s guint8
|
|
.param 2 alpha
|
|
.temp 4 t
|
|
.temp 4 t2
|
|
.temp 2 tw
|
|
.temp 1 tb
|
|
.temp 8 alpha_s
|
|
.temp 8 alpha_s_inv
|
|
.temp 8 alpha_factor
|
|
.temp 8 alpha_d
|
|
.temp 4 a
|
|
.temp 8 d_wide
|
|
.temp 8 s_wide
|
|
.const 4 xfs 0xffffffff
|
|
.const 4 a_alpha 0xff000000
|
|
.const 4 a_alpha_inv 0x00ffffff
|
|
|
|
# calc source alpha as alpha_s = alpha_s * alpha / 255
|
|
loadl t, s
|
|
shrul t2, t, 24
|
|
convlw tw, t2
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_s, a
|
|
x4 mullw alpha_s, alpha_s, alpha
|
|
x4 div255w alpha_s, alpha_s
|
|
x4 convubw s_wide, t
|
|
x4 mullw s_wide, s_wide, alpha_s
|
|
|
|
# calc destination alpha as alpha_factor = (255-alpha_s) * alpha_factor / 255
|
|
loadpl a, xfs
|
|
x4 convubw alpha_s_inv, a
|
|
x4 subw alpha_s_inv, alpha_s_inv, alpha_s
|
|
loadl t, d
|
|
shrul t2, t, 24
|
|
convlw tw, t2
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_factor, a
|
|
x4 mullw alpha_factor, alpha_factor, alpha_s_inv
|
|
x4 div255w alpha_factor, alpha_factor
|
|
x4 convubw d_wide, t
|
|
x4 mullw d_wide, d_wide, alpha_factor
|
|
|
|
# calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_factor*(255-alpha_s)/255
|
|
x4 addw d_wide, d_wide, s_wide
|
|
|
|
# calc the final destination alpha_factor = alpha_s + alpha_factor * (255-alpha_s)/255
|
|
x4 addw alpha_factor, alpha_factor, alpha_s
|
|
|
|
# now normalize the pix_d by the final alpha to make it associative
|
|
x4 divluw, d_wide, d_wide, alpha_factor
|
|
|
|
# calc the final global alpha_d = alpha_d + (alpha_s * (alpha / 255))
|
|
loadl t, d
|
|
shrul t2, t, 24
|
|
convlw tw, t2
|
|
convwb tb, tw
|
|
splatbl a, tb
|
|
x4 convubw alpha_d, a
|
|
x4 addw alpha_d, alpha_d, alpha_s
|
|
|
|
# pack the new alpha into the correct spot
|
|
x4 convwb t, d_wide
|
|
andl t, t, a_alpha_inv
|
|
x4 convwb a, alpha_d
|
|
andl a, a, a_alpha
|
|
orl t, t, a
|
|
storel d, t
|