Converted the scaler to all asm

Original commit message from CVS:
Converted the scaler to all asm
This commit is contained in:
Wim Taymans 2001-01-26 22:10:12 +00:00
parent de37358730
commit 70dfc6bb1d
3 changed files with 55 additions and 40 deletions

View file

@ -3,7 +3,7 @@ filterdir = $(libdir)/gst
filter_LTLIBRARIES = libgstvideoscale.la
if HAVE_CPU_I386
SCALER = gstscale_x86.c
SCALER = gstscale_x86.c gstscale_x86_asm.s
else
SCALER =
endif
@ -16,4 +16,4 @@ libgstvideoscaleinclude_HEADERS = gstvideoscale.h
noinst_HEADERS = gstscale_x86.h
#CFLAGS += -S -O1 $(FOMIT_FRAME_POINTER) -funroll-all-loops -finline-functions -ffast-math
CFLAGS = -g $(GLIB_CFLAGS) $(GST_CFLAGS) $(XML_CFLAGS) -O5 -fomit-frame-pointer -ffast-math
CFLAGS = $(GLIB_CFLAGS) $(GST_CFLAGS) $(XML_CFLAGS) -O5 -fomit-frame-pointer -ffast-math

View file

@ -78,41 +78,3 @@ gst_videoscale_generate_rowbytes_x86 (unsigned char *copy_row, int src_w, int ds
GST_DEBUG (0,"scaler start/end %p %p %p\n", copy_row, eip, (void*)(eip-copy_row));
}
void
gst_videoscale_scale_nearest_x86 (GstVideoScale *scale,
unsigned char *src,
unsigned char *dest,
int sw, int sh, int dw, int dh)
{
int pos, inc, y;
int u1, u2;
scale->temp = scale->copy_row;
GST_DEBUG (0,"videoscale: scaling nearest %p %p %p %d\n", scale->copy_row, src, dest, dw);
pos = 0x10000;
inc = (sh<<16)/dh;
for (y = dh; y > 0; y--) {
while (pos >0x10000) {
src += sw;
pos-=0x10000;
}
__asm__ __volatile__ ("
movl %2, %%eax\n
call *%%eax
"
: "=&D" (u1), "=&S" (u2)
: "g" (scale->temp), "0" (dest), "1" (src)
: "memory" );
dest+= dw;
pos += inc;
}
GST_DEBUG(0,"videoscale: scaling nearest done %p\n", scale->copy_row);
}

View file

@ -0,0 +1,53 @@
.text
.align 4
.globl gst_videoscale_scale_nearest_x86
.type gst_videoscale_scale_nearest_x86,@function
gst_videoscale_scale_nearest_x86:
subl $8,%esp
pushl %ebp
pushl %edi
pushl %esi
movl 28(%esp),%ebp
movl 24(%esp),%edx
addl $28,%edx
movl 24(%esp),%eax
movl %edx,8220(%eax)
movl $65536,12(%esp)
movl 40(%esp),%ecx
sall $16,%ecx
movl %ecx,%eax
cltd
idivl 48(%esp)
movl %eax,%ecx
movl 48(%esp),%eax
movl %eax,16(%esp)
testl %eax,%eax
jle .L92
jmp .L100
.p2align 4,,7
.L97:
addl 36(%esp),%ebp
addl $-65536,12(%esp)
.L100:
cmpl $65536,12(%esp)
jg .L97
movl 32(%esp),%edi
movl %ebp,%esi
movl 24(%esp),%edx
movl 8220(%edx), %eax
call *%eax
movl 44(%esp),%eax
addl %eax,32(%esp)
addl %ecx,12(%esp)
decl 16(%esp)
cmpl $0,16(%esp)
jg .L100
.L92:
popl %esi
popl %edi
popl %ebp
addl $8,%esp
ret