gst/videoscale/vs_scanline.c: C-level optimization of the RGBA nearest neighbour function.

Original commit message from CVS:
Reviewed by Edward Hervey  <edward@fluendo.com>
* gst/videoscale/vs_scanline.c: (vs_scanline_resample_nearest_RGBA):
C-level optimization of the RGBA nearest neighbour function.
Eventually this might end up in liboil with vectorized versions.
This commit is contained in:
Edward Hervey 2006-02-16 17:06:46 +00:00
parent 5b788a8a66
commit ced6e8445a
2 changed files with 50 additions and 6 deletions

View file

@ -1,3 +1,11 @@
2006-02-16 Mathieu Garcia <b0nk at free dot fr>
Reviewed by Edward Hervey <edward@fluendo.com>
* gst/videoscale/vs_scanline.c: (vs_scanline_resample_nearest_RGBA):
C-level optimization of the RGBA nearest neighbour function.
Eventually this might end up in liboil with vectorized versions.
2006-02-16 Tim-Philipp Müller <tim at centricular dot net>
* gst-libs/gst/audio/multichannel.c:

View file

@ -29,6 +29,7 @@
#include <liboil/liboil.h>
#include <glib.h>
#include <stdio.h>
/* greyscale, i.e., single componenet */
@ -114,22 +115,57 @@ void
vs_scanline_resample_nearest_RGBA (guint8 * dest, guint8 * src, int n,
int *accumulator, int increment)
{
guint8 *tmpsrc;
int acc = *accumulator;
int i;
int j;
int x;
for (i = 0; i < n; i++) {
/* Optimization Pass #1 :
* - Unroll loop by 16
* - Pointer arithmetics (most CPUs have DAGs !)
* - Avoid useless branching
*/
for (i = 0, tmpsrc = src; i < n; i++) {
j = acc >> 16;
x = acc & 0xffff;
dest[i * 4 + 0] = (x < 32768) ? src[j * 4 + 0] : src[j * 4 + 4];
dest[i * 4 + 1] = (x < 32768) ? src[j * 4 + 1] : src[j * 4 + 5];
dest[i * 4 + 2] = (x < 32768) ? src[j * 4 + 2] : src[j * 4 + 6];
dest[i * 4 + 3] = (x < 32768) ? src[j * 4 + 3] : src[j * 4 + 7];
acc += increment;
if (x < 32768) {
tmpsrc = src + j * 4;
*dest++ = *tmpsrc++;
/* We do it here to avoid low-level instruction locks */
acc += increment;
*dest++ = *tmpsrc++;
*dest++ = *tmpsrc++;
*dest++ = *tmpsrc++;
} else {
tmpsrc = src + (j + 1) * 4;;
*dest++ = *tmpsrc++;
/* We do it here to avoid low-level instruction locks */
acc += increment;
*dest++ = *tmpsrc++;
*dest++ = *tmpsrc++;
*dest++ = *tmpsrc++;
}
}
/* --- Unoptimized code BEGIN ---
for (i = 0; i < n; i++) {
j = acc >> 16;
x = acc & 0xffff;
dest[i * 4 + 0] = (x < 32768) ? src[j * 4 + 0] : src[j * 4 + 4];
dest[i * 4 + 1] = (x < 32768) ? src[j * 4 + 1] : src[j * 4 + 5];
dest[i * 4 + 2] = (x < 32768) ? src[j * 4 + 2] : src[j * 4 + 6];
dest[i * 4 + 3] = (x < 32768) ? src[j * 4 + 3] : src[j * 4 + 7];
acc += increment;
}
--- Unoptimized code END --- */
*accumulator = acc;
}