videoscale: refactor using more Orc code

Convert downsampling to Orc. Convert horizontal linear scaling to Orc. Combine horizontal and vertical scaling into one pass.
2025-06-05 06:58:56 +00:00 · 2010-06-14 15:42:09 -07:00 · 2010-06-14 15:42:09 -07:00 · 0cceeb2035
commit 0cceeb2035
parent 1a75dede56
5 changed files with 1740 additions and 272 deletions
--- a/gst/videoscale/gstvideoscaleorc-dist.c
+++ b/gst/videoscale/gstvideoscaleorc-dist.c
--- a/gst/videoscale/gstvideoscaleorc-dist.h
+++ b/gst/videoscale/gstvideoscaleorc-dist.h
@ -55,10 +55,20 @@ typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
 typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
 typedef union { orc_int64 i; double f; orc_int32 x2[2]; orc_int16 x4[4]; } orc_union64;
 #endif
-void orc_merge_linear_u8 (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, int p1, int p2, int n);
+void orc_merge_linear_u8 (orc_uint8 * d1, const orc_uint8 * s1, const orc_uint8 * s2, int p1, int n);
 void orc_merge_linear_u16 (orc_uint16 * d1, const orc_uint16 * s1, const orc_uint16 * s2, int p1, int p2, int n);
 void orc_splat_u16 (orc_uint16 * d1, int p1, int n);
 void orc_splat_u32 (orc_uint32 * d1, int p1, int n);
+void orc_downsample_u8 (guint8 * d1, const guint8 * s1, int n);
+void orc_downsample_u16 (guint16 * d1, const guint16 * s1, int n);
+void gst_videoscale_orc_downsample_u32 (guint8 * d1, const guint8 * s1, int n);
+void gst_videoscale_orc_downsample_yuyv (guint8 * d1, const guint8 * s1, int n);
+void gst_videoscale_orc_resample_nearest_u8 (guint8 * d1, const guint8 * s1, int p1, int p2, int n);
+void gst_videoscale_orc_resample_bilinear_u8 (guint8 * d1, const guint8 * s1, int p1, int p2, int n);
+void gst_videoscale_orc_resample_nearest_u32 (guint8 * d1, const guint8 * s1, int p1, int p2, int n);
+void gst_videoscale_orc_resample_bilinear_u32 (guint8 * d1, const guint8 * s1, int p1, int p2, int n);
+void gst_videoscale_orc_resample_merge_bilinear_u32 (guint8 * d1, guint8 * d2, const guint8 * s1, const guint8 * s2, int p1, int p2, int p3, int n);
+void gst_videoscale_orc_merge_bicubic_u8 (guint8 * d1, const guint8 * s1, const guint8 * s2, const guint8 * s3, const guint8 * s4, int p1, int p2, int p3, int p4, int n);

 #ifdef __cplusplus
 }
--- a/gst/videoscale/gstvideoscaleorc.orc
+++ b/gst/videoscale/gstvideoscaleorc.orc
@ -6,16 +6,21 @@
 .source 1 s1
 .source 1 s2
 .param 1 p1
-.param 1 p2
 .temp 2 t1
 .temp 2 t2
+.temp 1 a
+.temp 1 t
+
+loadb a, s1
+convubw t1, s1
+convubw t2, s2
+subw t2, t2, t1
+mullw t2, t2, p1
+addw t2, t2, 128
+convhwb t, t2
+addb d1, t, a
+

-mulubw t1, s1, p1
-mulubw t2, s2, p2
-addw t1, t1, t2
-addw t1, t1, 128
-shruw t1, t1, 8
-convwb d1, t1

 .function orc_merge_linear_u16
 .dest 2 d1
@ -26,21 +31,162 @@ convwb d1, t1
 .temp 4 t1
 .temp 4 t2

+# This is slightly different thatn the u8 case, since muluwl
+# tends to be much faster than mulll
 muluwl t1, s1, p1
 muluwl t2, s2, p2
 addl t1, t1, t2
 shrul t1, t1, 16
 convlw d1, t1

+
 .function orc_splat_u16
 .dest 2 d1
 .param 2 p1

 copyw d1, p1

+
 .function orc_splat_u32
 .dest 4 d1
 .param 4 p1

 copyl d1, p1

+
+.function orc_downsample_u8
+.dest 1 d1 guint8
+.source 2 s1 guint8
+.temp 1 t1
+.temp 1 t2
+
+splitwb t1, t2, s1
+avgub d1, t1, t2
+
+
+.function orc_downsample_u16
+.dest 2 d1 guint16
+.source 4 s1 guint16
+.temp 2 t1
+.temp 2 t2
+
+splitlw t1, t2, s1
+avguw d1, t1, t2
+
+
+.function gst_videoscale_orc_downsample_u32
+.dest 4 d1 guint8
+.source 8 s1 guint8
+.temp 4 t1
+.temp 4 t2
+
+splitql t1, t2, s1
+x4 avgub d1, t1, t2
+
+
+.function gst_videoscale_orc_downsample_yuyv
+.dest 4 d1 guint8
+.source 8 s1 guint8
+.temp 4 yyyy
+.temp 4 uvuv
+.temp 2 t1
+.temp 2 t2
+.temp 2 yy
+.temp 2 uv
+
+x4 splitwb yyyy, uvuv, s1
+x2 splitwb t1, t2, yyyy
+x2 avgub yy, t1, t2
+splitlw t1, t2, uvuv
+x2 avgub uv, t1, t2
+x2 mergebw d1, yy, uv
+
+
+
+.function gst_videoscale_orc_resample_nearest_u8
+.dest 1 d1 guint8
+.source 1 s1 guint8
+.param 4 p1
+.param 4 p2
+
+ldresnearb d1, s1, p1, p2
+
+
+.function gst_videoscale_orc_resample_bilinear_u8
+.dest 1 d1 guint8
+.source 1 s1 guint8
+.param 4 p1
+.param 4 p2
+
+ldreslinb d1, s1, p1, p2
+
+
+.function gst_videoscale_orc_resample_nearest_u32
+.dest 4 d1 guint8
+.source 4 s1 guint8
+.param 4 p1
+.param 4 p2
+
+ldresnearl d1, s1, p1, p2
+
+
+.function gst_videoscale_orc_resample_bilinear_u32
+.dest 4 d1 guint8
+.source 4 s1 guint8
+.param 4 p1
+.param 4 p2
+
+ldreslinl d1, s1, p1, p2
+
+
+.function gst_videoscale_orc_resample_merge_bilinear_u32
+.dest 4 d1 guint8
+.dest 4 d2 guint8
+.source 4 s1 guint8
+.source 4 s2 guint8
+.temp 4 a
+.temp 4 b
+.temp 4 t
+.temp 8 t1
+.temp 8 t2
+.param 4 p1
+.param 4 p2
+.param 4 p3
+
+ldreslinl b, s2, p2, p3
+storel d2, b
+x4 loadb a, s1
+x4 convubw t1, a
+x4 convubw t2, b
+x4 subw t2, t2, t1
+x4 mullw t2, t2, p1
+x4 convhwb t, t2
+x4 addb d1, t, a
+
+
+
+.function gst_videoscale_orc_merge_bicubic_u8
+.dest 1 d1 guint8
+.source 1 s1 guint8
+.source 1 s2 guint8
+.source 1 s3 guint8
+.source 1 s4 guint8
+.param 4 p1
+.param 4 p2
+.param 4 p3
+.param 4 p4
+.temp 2 t1
+.temp 2 t2
+
+mulubw t1, s2, p2
+mulubw t2, s3, p3
+addw t1, t1, t2
+mulubw t2, s1, p1
+subw t1, t1, t2
+mulubw t2, s4, p4
+subw t1, t1, t2
+addw t1, t1, 32
+shrsw t1, t1, 6
+convsuswb d1, t1
+
+
--- a/gst/videoscale/vs_image.c
+++ b/gst/videoscale/vs_image.c
@ -30,6 +30,9 @@
 #include "vs_scanline.h"
 #include "vs_image.h"

+#include "gstvideoscaleorc.h"
+#include <gst/gst.h>
+
 #define ROUND_UP_2(x)  (((x)+1)&~1)
 #define ROUND_UP_4(x)  (((x)+3)&~3)
 #define ROUND_UP_8(x)  (((x)+7)&~7)
@ -43,7 +46,7 @@ vs_image_scale_nearest_RGBA (const VSImage * dest, const VSImage * src,
  int x_increment;
  int i;
  int j;
-  int xacc;
+  int prev_j;

  if (dest->height == 1)
    y_increment = 0;
@ -57,14 +60,19 @@ vs_image_scale_nearest_RGBA (const VSImage * dest, const VSImage * src,


  acc = 0;
+  prev_j = -1;
  for (i = 0; i < dest->height; i++) {
    j = acc >> 16;

-    xacc = 0;
-    vs_scanline_resample_nearest_RGBA (dest->pixels + i * dest->stride,
-        src->pixels + j * src->stride, src->width, dest->width, &xacc,
-        x_increment);
+    if (j == prev_j) {
+      memcpy (dest->pixels + i * dest->stride,
+          dest->pixels + (i - 1) * dest->stride, dest->width * 4);
+    } else {
+      gst_videoscale_orc_resample_nearest_u32 (dest->pixels + i * dest->stride,
+          src->pixels + j * src->stride, 0, x_increment, dest->width);
+    }

+    prev_j = j;
    acc += y_increment;
  }
 }
@ -76,15 +84,12 @@ vs_image_scale_linear_RGBA (const VSImage * dest, const VSImage * src,
  int acc;
  int y_increment;
  int x_increment;
-  uint8_t *tmp1;
-  uint8_t *tmp2;
  int y1;
  int y2;
  int i;
  int j;
  int x;
  int dest_size;
-  int xacc;

  if (dest->height == 1)
    y_increment = 0;
@ -98,64 +103,34 @@ vs_image_scale_linear_RGBA (const VSImage * dest, const VSImage * src,

  dest_size = dest->width * 4;

-  tmp1 = tmpbuf;
-  tmp2 = tmpbuf + dest_size;
+#define LINE(x) ((tmpbuf) + (dest_size)*((x)&1))

  acc = 0;
-  xacc = 0;
  y2 = -1;
-  vs_scanline_resample_linear_RGBA (tmp1, src->pixels, src->width, dest->width,
-      &xacc, x_increment);
+  gst_videoscale_orc_resample_bilinear_u32 (LINE (0), src->pixels,
+      0, x_increment, dest->width);
  y1 = 0;
  for (i = 0; i < dest->height; i++) {
    j = acc >> 16;
    x = acc & 0xffff;

    if (x == 0) {
-      if (j == y1) {
-        memcpy (dest->pixels + i * dest->stride, tmp1, dest_size);
-      } else if (j == y2) {
-        memcpy (dest->pixels + i * dest->stride, tmp2, dest_size);
-      } else {
-        xacc = 0;
-        vs_scanline_resample_linear_RGBA (tmp1, src->pixels + j * src->stride,
-            src->width, dest->width, &xacc, x_increment);
-        y1 = j;
-        memcpy (dest->pixels + i * dest->stride, tmp1, dest_size);
-      }
+      memcpy (dest->pixels + i * dest->stride, LINE (j), dest_size);
    } else {
-      if (j == y1) {
-        if (j + 1 != y2) {
-          xacc = 0;
-          vs_scanline_resample_linear_RGBA (tmp2,
-              src->pixels + (j + 1) * src->stride, src->width, dest->width,
-              &xacc, x_increment);
-          y2 = j + 1;
-        }
-        vs_scanline_merge_linear_RGBA (dest->pixels + i * dest->stride,
-            tmp1, tmp2, dest->width, x);
-      } else if (j == y2) {
-        if (j + 1 != y1) {
-          xacc = 0;
-          vs_scanline_resample_linear_RGBA (tmp1,
-              src->pixels + (j + 1) * src->stride, src->width, dest->width,
-              &xacc, x_increment);
-          y1 = j + 1;
-        }
-        vs_scanline_merge_linear_RGBA (dest->pixels + i * dest->stride,
-            tmp2, tmp1, dest->width, x);
+      if (j > y1) {
+        gst_videoscale_orc_resample_bilinear_u32 (LINE (j),
+            src->pixels + j * src->stride, 0, x_increment, dest->width);
+        y1++;
+      }
+      if (j >= y1) {
+        gst_videoscale_orc_resample_merge_bilinear_u32 (dest->pixels +
+            i * dest->stride, LINE (j + 1), LINE (j),
+            src->pixels + (j + 1) * src->stride, (x >> 8), 0, x_increment,
+            dest->width);
+        y1++;
      } else {
-        xacc = 0;
-        vs_scanline_resample_linear_RGBA (tmp1, src->pixels + j * src->stride,
-            src->width, dest->width, &xacc, x_increment);
-        y1 = j;
-        xacc = 0;
-        vs_scanline_resample_linear_RGBA (tmp2,
-            src->pixels + (j + 1) * src->stride, src->width, dest->width, &xacc,
-            x_increment);
-        y2 = (j + 1);
-        vs_scanline_merge_linear_RGBA (dest->pixels + i * dest->stride,
-            tmp1, tmp2, dest->width, x);
+        orc_merge_linear_u8 (dest->pixels + i * dest->stride,
+            LINE (j), LINE (j + 1), (x >> 8), dest->width * 4);
      }
    }

@ -563,7 +538,6 @@ vs_image_scale_nearest_Y (const VSImage * dest, const VSImage * src,
  int x_increment;
  int i;
  int j;
-  int xacc;

  if (dest->height == 1)
    y_increment = 0;
@ -579,11 +553,8 @@ vs_image_scale_nearest_Y (const VSImage * dest, const VSImage * src,
  for (i = 0; i < dest->height; i++) {
    j = acc >> 16;

-    xacc = 0;
-    vs_scanline_resample_nearest_Y (dest->pixels + i * dest->stride,
-        src->pixels + j * src->stride, src->width, dest->width, &xacc,
-        x_increment);
-
+    gst_videoscale_orc_resample_nearest_u8 (dest->pixels + i * dest->stride,
+        src->pixels + j * src->stride, 0, x_increment, dest->width);
    acc += y_increment;
  }
 }
@ -623,8 +594,8 @@ vs_image_scale_linear_Y (const VSImage * dest, const VSImage * src,
  acc = 0;
  xacc = 0;
  y2 = -1;
-  vs_scanline_resample_linear_Y (tmp1, src->pixels, src->width, dest->width,
-      &xacc, x_increment);
+  gst_videoscale_orc_resample_bilinear_u8 (tmp1, src->pixels,
+      0, x_increment, dest->width);
  y1 = 0;
  for (i = 0; i < dest->height; i++) {
    j = acc >> 16;
@ -637,8 +608,8 @@ vs_image_scale_linear_Y (const VSImage * dest, const VSImage * src,
        memcpy (dest->pixels + i * dest->stride, tmp2, dest_size);
      } else {
        xacc = 0;
-        vs_scanline_resample_linear_Y (tmp1, src->pixels + j * src->stride,
-            src->width, dest->width, &xacc, x_increment);
+        gst_videoscale_orc_resample_bilinear_u8 (tmp1,
+            src->pixels + j * src->stride, 0, x_increment, dest->width);
        y1 = j;
        memcpy (dest->pixels + i * dest->stride, tmp1, dest_size);
      }
@ -646,35 +617,42 @@ vs_image_scale_linear_Y (const VSImage * dest, const VSImage * src,
      if (j == y1) {
        if (j + 1 != y2) {
          xacc = 0;
-          vs_scanline_resample_linear_Y (tmp2,
-              src->pixels + (j + 1) * src->stride, src->width, dest->width,
-              &xacc, x_increment);
+          gst_videoscale_orc_resample_bilinear_u8 (tmp2,
+              src->pixels + (j + 1) * src->stride, 0, x_increment, dest->width);
          y2 = j + 1;
        }
-        vs_scanline_merge_linear_Y (dest->pixels + i * dest->stride,
-            tmp1, tmp2, dest->width, x);
+        if ((x >> 8) == 0) {
+          memcpy (dest->pixels + i * dest->stride, tmp1, dest->width);
+        } else {
+          orc_merge_linear_u8 (dest->pixels + i * dest->stride,
+              tmp1, tmp2, (x >> 8), dest->width);
+        }
      } else if (j == y2) {
        if (j + 1 != y1) {
          xacc = 0;
-          vs_scanline_resample_linear_Y (tmp1,
-              src->pixels + (j + 1) * src->stride, src->width, dest->width,
-              &xacc, x_increment);
+          gst_videoscale_orc_resample_bilinear_u8 (tmp1,
+              src->pixels + (j + 1) * src->stride, 0, x_increment, dest->width);
          y1 = j + 1;
        }
-        vs_scanline_merge_linear_Y (dest->pixels + i * dest->stride,
-            tmp2, tmp1, dest->width, x);
+        if ((x >> 8) == 0) {
+          memcpy (dest->pixels + i * dest->stride, tmp2, dest->width);
+        } else {
+          orc_merge_linear_u8 (dest->pixels + i * dest->stride,
+              tmp2, tmp1, (x >> 8), dest->width);
+        }
      } else {
-        xacc = 0;
-        vs_scanline_resample_linear_Y (tmp1, src->pixels + j * src->stride,
-            src->width, dest->width, &xacc, x_increment);
+        gst_videoscale_orc_resample_bilinear_u8 (tmp1,
+            src->pixels + j * src->stride, 0, x_increment, dest->width);
        y1 = j;
-        xacc = 0;
-        vs_scanline_resample_linear_Y (tmp2,
-            src->pixels + (j + 1) * src->stride, src->width, dest->width, &xacc,
-            x_increment);
+        gst_videoscale_orc_resample_bilinear_u8 (tmp2,
+            src->pixels + (j + 1) * src->stride, 0, x_increment, dest->width);
        y2 = (j + 1);
-        vs_scanline_merge_linear_Y (dest->pixels + i * dest->stride,
-            tmp1, tmp2, dest->width, x);
+        if ((x >> 8) == 0) {
+          memcpy (dest->pixels + i * dest->stride, tmp1, dest->width);
+        } else {
+          orc_merge_linear_u8 (dest->pixels + i * dest->stride,
+              tmp1, tmp2, (x >> 8), dest->width);
+        }
      }
    }

--- a/gst/videoscale/vs_scanline.c
+++ b/gst/videoscale/vs_scanline.c
@ -28,6 +28,7 @@
 #include "vs_scanline.h"

 #include "gstvideoscaleorc.h"
+#include <gst/gst.h>

 #include <string.h>

@ -36,31 +37,17 @@
 void
 vs_scanline_downsample_Y (uint8_t * dest, uint8_t * src, int n)
 {
-  int i;
-
-  for (i = 0; i < n; i++) {
-    dest[i] = (src[i * 2] + src[i * 2 + 1]) / 2;
-  }
+  orc_downsample_u8 (dest, src, n);
 }

 void
 vs_scanline_resample_nearest_Y (uint8_t * dest, uint8_t * src, int src_width,
    int n, int *accumulator, int increment)
 {
-  int acc = *accumulator;
-  int i;
-  int j;
-  int x;
+  gst_videoscale_orc_resample_nearest_u8 (dest, src,
+      *accumulator, increment, n);

-  for (i = 0; i < n; i++) {
-    j = acc >> 16;
-    x = acc & 0xffff;
-    dest[i] = (x < 32768 || j + 1 >= src_width) ? src[j] : src[j + 1];
-
-    acc += increment;
-  }
-
-  *accumulator = acc;
+  *accumulator += n * increment;
 }

 #include <glib.h>
@ -68,24 +55,10 @@ void
 vs_scanline_resample_linear_Y (uint8_t * dest, uint8_t * src, int src_width,
    int n, int *accumulator, int increment)
 {
-  int acc = *accumulator;
-  int i;
-  int j;
-  int x;
+  gst_videoscale_orc_resample_bilinear_u8 (dest, src,
+      *accumulator, increment, n);

-  for (i = 0; i < n; i++) {
-    j = acc >> 16;
-    x = acc & 0xffff;
-
-    if (j + 1 < src_width)
-      dest[i] = (src[j] * (65536 - x) + src[j + 1] * x) >> 16;
-    else
-      dest[i] = src[j];
-
-    acc += increment;
-  }
-
-  *accumulator = acc;
+  *accumulator += n * increment;
 }

 void
@ -97,19 +70,14 @@ vs_scanline_merge_linear_Y (uint8_t * dest, uint8_t * src1, uint8_t * src2,
  if (value == 0) {
    memcpy (dest, src1, n);
  } else {
-    orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, n);
+    orc_merge_linear_u8 (dest, src1, src2, value, n);
  }
 }

 void
 vs_scanline_downsample_Y16 (uint8_t * dest, uint8_t * src, int n)
 {
-  int i;
-  uint16_t *d = (uint16_t *) dest, *s = (uint16_t *) src;
-
-  for (i = 0; i < n; i++) {
-    d[i] = (s[i * 2] + s[i * 2 + 1]) / 2;
-  }
+  orc_downsample_u16 ((uint16_t *) dest, (uint16_t *) src, n);
 }

 void
@ -178,80 +146,27 @@ vs_scanline_merge_linear_Y16 (uint8_t * dest, uint8_t * src1, uint8_t * src2,
 void
 vs_scanline_downsample_RGBA (uint8_t * dest, uint8_t * src, int n)
 {
-  int i;
-
-  for (i = 0; i < n; i++) {
-    dest[i * 4 + 0] = (src[i * 8 + 0] + src[i * 8 + 4]) / 2;
-    dest[i * 4 + 1] = (src[i * 8 + 1] + src[i * 8 + 5]) / 2;
-    dest[i * 4 + 2] = (src[i * 8 + 2] + src[i * 8 + 6]) / 2;
-    dest[i * 4 + 3] = (src[i * 8 + 3] + src[i * 8 + 7]) / 2;
-  }
+  gst_videoscale_orc_downsample_u32 (dest, src, n);
 }

 void
 vs_scanline_resample_nearest_RGBA (uint8_t * dest, uint8_t * src, int src_width,
    int n, int *accumulator, int increment)
 {
-  int acc = *accumulator;
-  int i;
-  int j;
-  int x;
+  gst_videoscale_orc_resample_nearest_u32 (dest, src,
+      *accumulator, increment, n);

-  for (i = 0; i < n; i++) {
-    j = acc >> 16;
-    x = acc & 0xffff;
-
-    if (j + 1 < src_width) {
-      dest[i * 4 + 0] = (x < 32768) ? src[j * 4 + 0] : src[j * 4 + 4];
-      dest[i * 4 + 1] = (x < 32768) ? src[j * 4 + 1] : src[j * 4 + 5];
-      dest[i * 4 + 2] = (x < 32768) ? src[j * 4 + 2] : src[j * 4 + 6];
-      dest[i * 4 + 3] = (x < 32768) ? src[j * 4 + 3] : src[j * 4 + 7];
-    } else {
-      dest[i * 4 + 0] = src[j * 4 + 0];
-      dest[i * 4 + 1] = src[j * 4 + 1];
-      dest[i * 4 + 2] = src[j * 4 + 2];
-      dest[i * 4 + 3] = src[j * 4 + 3];
-    }
-
-    acc += increment;
-  }
-
-  *accumulator = acc;
+  *accumulator += n * increment;
 }

 void
 vs_scanline_resample_linear_RGBA (uint8_t * dest, uint8_t * src, int src_width,
    int n, int *accumulator, int increment)
 {
-  int acc = *accumulator;
-  int i;
-  int j;
-  int x;
+  gst_videoscale_orc_resample_bilinear_u32 (dest, src,
+      *accumulator, increment, n);

-  for (i = 0; i < n; i++) {
-    j = acc >> 16;
-    x = acc & 0xffff;
-
-    if (j + 1 < src_width) {
-      dest[i * 4 + 0] =
-          (src[j * 4 + 0] * (65536 - x) + src[j * 4 + 4] * x) >> 16;
-      dest[i * 4 + 1] =
-          (src[j * 4 + 1] * (65536 - x) + src[j * 4 + 5] * x) >> 16;
-      dest[i * 4 + 2] =
-          (src[j * 4 + 2] * (65536 - x) + src[j * 4 + 6] * x) >> 16;
-      dest[i * 4 + 3] =
-          (src[j * 4 + 3] * (65536 - x) + src[j * 4 + 7] * x) >> 16;
-    } else {
-      dest[i * 4 + 0] = src[j * 4 + 0];
-      dest[i * 4 + 1] = src[j * 4 + 1];
-      dest[i * 4 + 2] = src[j * 4 + 2];
-      dest[i * 4 + 3] = src[j * 4 + 3];
-    }
-
-    acc += increment;
-  }
-
-  *accumulator = acc;
+  *accumulator += n * increment;
 }

 void
@ -263,7 +178,7 @@ vs_scanline_merge_linear_RGBA (uint8_t * dest, uint8_t * src1, uint8_t * src2,
  if (value == 0) {
    memcpy (dest, src1, n * 4);
  } else {
-    orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, n * 4);
+    orc_merge_linear_u8 (dest, src1, src2, value, n * 4);
  }
 }

@ -348,7 +263,7 @@ vs_scanline_merge_linear_RGB (uint8_t * dest, uint8_t * src1, uint8_t * src2,
  if (value == 0) {
    memcpy (dest, src1, n * 3);
  } else {
-    orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, n * 3);
+    orc_merge_linear_u8 (dest, src1, src2, value, n * 3);
  }
 }

@ -361,14 +276,7 @@ vs_scanline_merge_linear_RGB (uint8_t * dest, uint8_t * src1, uint8_t * src2,
 void
 vs_scanline_downsample_YUYV (uint8_t * dest, uint8_t * src, int n)
 {
-  int i;
-
-  for (i = 0; i < n; i++) {
-    dest[i * 4 + 0] = (src[i * 8 + 0] + src[i * 8 + 2]) / 2;
-    dest[i * 4 + 1] = (src[i * 8 + 1] + src[i * 8 + 5]) / 2;
-    dest[i * 4 + 2] = (src[i * 8 + 4] + src[i * 8 + 6]) / 2;
-    dest[i * 4 + 3] = (src[i * 8 + 3] + src[i * 8 + 7]) / 2;
-  }
+  gst_videoscale_orc_downsample_yuyv (dest, src, n);
 }

 void
@ -477,7 +385,7 @@ vs_scanline_merge_linear_YUYV (uint8_t * dest, uint8_t * src1, uint8_t * src2,
  if (value == 0) {
    memcpy (dest, src1, quads * 4);
  } else {
-    orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, quads * 4);
+    orc_merge_linear_u8 (dest, src1, src2, value, quads * 4);
  }
 }

@ -606,7 +514,7 @@ vs_scanline_merge_linear_UYVY (uint8_t * dest, uint8_t * src1,
  if (value == 0) {
    memcpy (dest, src1, quads * 4);
  } else {
-    orc_merge_linear_u8 (dest, src1, src2, 256 - value, value, quads * 4);
+    orc_merge_linear_u8 (dest, src1, src2, value, quads * 4);
  }
 }