mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-02-08 07:22:32 +00:00
gst/videoscale/vs_scanline.c: C-level optimization of the RGBA nearest neighbour function.
Original commit message from CVS: Reviewed by Edward Hervey <edward@fluendo.com> * gst/videoscale/vs_scanline.c: (vs_scanline_resample_nearest_RGBA): C-level optimization of the RGBA nearest neighbour function. Eventually this might end up in liboil with vectorized versions.
This commit is contained in:
parent
5b788a8a66
commit
ced6e8445a
2 changed files with 50 additions and 6 deletions
|
@ -1,3 +1,11 @@
|
||||||
|
2006-02-16 Mathieu Garcia <b0nk at free dot fr>
|
||||||
|
|
||||||
|
Reviewed by Edward Hervey <edward@fluendo.com>
|
||||||
|
|
||||||
|
* gst/videoscale/vs_scanline.c: (vs_scanline_resample_nearest_RGBA):
|
||||||
|
C-level optimization of the RGBA nearest neighbour function.
|
||||||
|
Eventually this might end up in liboil with vectorized versions.
|
||||||
|
|
||||||
2006-02-16 Tim-Philipp Müller <tim at centricular dot net>
|
2006-02-16 Tim-Philipp Müller <tim at centricular dot net>
|
||||||
|
|
||||||
* gst-libs/gst/audio/multichannel.c:
|
* gst-libs/gst/audio/multichannel.c:
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
#include <liboil/liboil.h>
|
#include <liboil/liboil.h>
|
||||||
#include <glib.h>
|
#include <glib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
/* greyscale, i.e., single componenet */
|
/* greyscale, i.e., single componenet */
|
||||||
|
|
||||||
|
@ -114,22 +115,57 @@ void
|
||||||
vs_scanline_resample_nearest_RGBA (guint8 * dest, guint8 * src, int n,
|
vs_scanline_resample_nearest_RGBA (guint8 * dest, guint8 * src, int n,
|
||||||
int *accumulator, int increment)
|
int *accumulator, int increment)
|
||||||
{
|
{
|
||||||
|
guint8 *tmpsrc;
|
||||||
int acc = *accumulator;
|
int acc = *accumulator;
|
||||||
int i;
|
int i;
|
||||||
int j;
|
int j;
|
||||||
int x;
|
int x;
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
/* Optimization Pass #1 :
|
||||||
|
* - Unroll loop by 16
|
||||||
|
* - Pointer arithmetics (most CPUs have DAGs !)
|
||||||
|
* - Avoid useless branching
|
||||||
|
*/
|
||||||
|
for (i = 0, tmpsrc = src; i < n; i++) {
|
||||||
j = acc >> 16;
|
j = acc >> 16;
|
||||||
x = acc & 0xffff;
|
x = acc & 0xffff;
|
||||||
dest[i * 4 + 0] = (x < 32768) ? src[j * 4 + 0] : src[j * 4 + 4];
|
|
||||||
dest[i * 4 + 1] = (x < 32768) ? src[j * 4 + 1] : src[j * 4 + 5];
|
|
||||||
dest[i * 4 + 2] = (x < 32768) ? src[j * 4 + 2] : src[j * 4 + 6];
|
|
||||||
dest[i * 4 + 3] = (x < 32768) ? src[j * 4 + 3] : src[j * 4 + 7];
|
|
||||||
|
|
||||||
acc += increment;
|
if (x < 32768) {
|
||||||
|
tmpsrc = src + j * 4;
|
||||||
|
*dest++ = *tmpsrc++;
|
||||||
|
|
||||||
|
/* We do it here to avoid low-level instruction locks */
|
||||||
|
acc += increment;
|
||||||
|
|
||||||
|
*dest++ = *tmpsrc++;
|
||||||
|
*dest++ = *tmpsrc++;
|
||||||
|
*dest++ = *tmpsrc++;
|
||||||
|
} else {
|
||||||
|
tmpsrc = src + (j + 1) * 4;;
|
||||||
|
*dest++ = *tmpsrc++;
|
||||||
|
|
||||||
|
/* We do it here to avoid low-level instruction locks */
|
||||||
|
acc += increment;
|
||||||
|
|
||||||
|
*dest++ = *tmpsrc++;
|
||||||
|
*dest++ = *tmpsrc++;
|
||||||
|
*dest++ = *tmpsrc++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* --- Unoptimized code BEGIN ---
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
j = acc >> 16;
|
||||||
|
x = acc & 0xffff;
|
||||||
|
dest[i * 4 + 0] = (x < 32768) ? src[j * 4 + 0] : src[j * 4 + 4];
|
||||||
|
dest[i * 4 + 1] = (x < 32768) ? src[j * 4 + 1] : src[j * 4 + 5];
|
||||||
|
dest[i * 4 + 2] = (x < 32768) ? src[j * 4 + 2] : src[j * 4 + 6];
|
||||||
|
dest[i * 4 + 3] = (x < 32768) ? src[j * 4 + 3] : src[j * 4 + 7];
|
||||||
|
|
||||||
|
acc += increment;
|
||||||
|
}
|
||||||
|
--- Unoptimized code END --- */
|
||||||
|
|
||||||
*accumulator = acc;
|
*accumulator = acc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue