diff --git a/gst/compositor/compositororc.orc b/gst/compositor/compositororc.orc index 5a348b2539..a9368b7343 100644 --- a/gst/compositor/compositororc.orc +++ b/gst/compositor/compositororc.orc @@ -35,6 +35,7 @@ convsuswb d1, t2 .source 4 s guint8 .param 2 alpha .temp 4 t +.temp 4 t2 .temp 2 tw .temp 1 tb .temp 4 a @@ -49,14 +50,26 @@ convwb tb, tw splatbl a, tb x4 convubw a_wide, a x4 mullw a_wide, a_wide, alpha -x4 shruw a_wide, a_wide, 8 +x4 div255w a_wide, a_wide + +# dest pixel into t2 +loadl t2, d + +# t = s - d (as bytes) +x4 subb t, t, t2 + +# s_wide = (uint16)(t) * alpha x4 convubw s_wide, t -loadl t, d -x4 convubw d_wide, t -x4 subw s_wide, s_wide, d_wide x4 mullw s_wide, s_wide, a_wide + +# s_wide /= 255 x4 div255w s_wide, s_wide + +# d_wide = (uint16)(dest) + s_wide +x4 convubw d_wide, t2 x4 addw d_wide, d_wide, s_wide + +# Set output alpha to 0xff and store x4 convwb t, d_wide orl t, t, a_alpha storel d, t @@ -83,13 +96,17 @@ convwb tb, tw splatbl a, tb x4 convubw a_wide, a x4 mullw a_wide, a_wide, alpha -x4 shruw a_wide, a_wide, 8 +x4 div255w a_wide, a_wide + +loadl t2, d +# t = s - d (as bytes) +x4 subb t, t, t2 + x4 convubw s_wide, t -loadl t, d -x4 convubw d_wide, t -x4 subw s_wide, s_wide, d_wide x4 mullw s_wide, s_wide, a_wide x4 div255w s_wide, s_wide + +x4 convubw d_wide, t2 x4 addw d_wide, d_wide, s_wide x4 convwb t, d_wide orl t, t, a_alpha @@ -114,14 +131,14 @@ storel d, t .const 4 a_alpha 0x000000ff .const 4 a_alpha_inv 0xffffff00 -# calc source alpha as alpha_s = alpha_s * alpha / 256 +# calc source alpha as alpha_s = alpha_s * alpha / 255 loadl t, s convlw tw, t convwb tb, tw splatbl a, tb x4 convubw alpha_s, a x4 mullw alpha_s, alpha_s, alpha -x4 shruw alpha_s, alpha_s, 8 +x4 div255w alpha_s, alpha_s x4 convubw s_wide, t x4 mullw s_wide, s_wide, alpha_s @@ -175,7 +192,7 @@ storel d, t .const 4 a_alpha 0xff000000 .const 4 a_alpha_inv 0x00ffffff -# calc source alpha as alpha_s = alpha_s * alpha / 256 +# calc source alpha as alpha_s = alpha_s * alpha / 255 loadl t, s shrul t2, t, 24 convlw tw, t2 @@ -183,7 +200,7 @@ convwb tb, tw splatbl a, tb x4 convubw alpha_s, a x4 mullw alpha_s, alpha_s, alpha -x4 shruw alpha_s, alpha_s, 8 +x4 div255w alpha_s, alpha_s x4 convubw s_wide, t x4 mullw s_wide, s_wide, alpha_s