diff --git a/libs/Makefile.am b/libs/Makefile.am index d2e671cd32..d02f8815fa 100644 --- a/libs/Makefile.am +++ b/libs/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = riff colorspace getbits putbits videoscale winloader +SUBDIRS = riff colorspace getbits putbits videoscale winloader idct -DIST_SUBDIRS = riff colorspace getbits putbits videoscale winloader +DIST_SUBDIRS = riff colorspace getbits putbits videoscale winloader idct diff --git a/libs/getbits/gstgetbits.c b/libs/getbits/gstgetbits.c index 07ac191b8b..56f1aff456 100644 --- a/libs/getbits/gstgetbits.c +++ b/libs/getbits/gstgetbits.c @@ -200,9 +200,9 @@ void getbits_back_int(gst_getbits_t *gb,unsigned long bits) { } } gb->dword = bswap_32(*((unsigned long *)(gb->ptr))); - fprintf(stderr,"orignal new loaded word is %08x\n",gb->dword); + fprintf(stderr,"orignal new loaded word is %08lx\n",gb->dword); gb->dword <<= (32 - gb->bits); - fprintf(stderr,"shifted (by %lu) word is %08x\n",gb->bits,gb->dword); + fprintf(stderr,"shifted (by %lu) word is %08lx\n",gb->bits,gb->dword); } void getbits_byteback_int(gst_getbits_t *gb,unsigned long bytes) { diff --git a/libs/getbits/gstgetbits_inl.h b/libs/getbits/gstgetbits_inl.h index c1e1d11b73..ca894a241b 100644 --- a/libs/getbits/gstgetbits_inl.h +++ b/libs/getbits/gstgetbits_inl.h @@ -277,7 +277,7 @@ extern unsigned long gst_getbits_nBitMask[]; } -#define gst_flushbits(gb, num) \ +#define gst_flushbitsn(gb, num) \ { \ (gb)->bits += num; \ \ diff --git a/libs/idct/Makefile.am b/libs/idct/Makefile.am new file mode 100644 index 0000000000..a7c6a20e9a --- /dev/null +++ b/libs/idct/Makefile.am @@ -0,0 +1,19 @@ +filterdir = $(libdir)/gst + +filter_LTLIBRARIES = libgstidct.la + +libgstidct_la_SOURCES = fastintidct.c floatidct.c gstidct.c intidct.c mmxidct.S mmx32idct.c + +bin_PROGRAMS = ieeetest + +ieeetest_SOURCES = ieeetest.c +ieeetest_LDADD = libgstidct.la $(GLIB_LIBS) $(GTK_LIBS) $(top_srcdir)/gst/libgst.la +ieeetest_CFLAGS = $(shell gnome-config --cflags gnomeui) -g -Wall +ieeetest_LDFLAGS = $(shell gnome-config --libs gnomeui) + +noinst_HEADERS = gstidct.h + +CFLAGS += -Wall -O2 -funroll-all-loops -finline-functions -ffast-math + +INCLUDES = $(GLIB_CFLAGS) $(GTK_CFLAGS) -I$(top_srcdir) -I$(top_srcdir)/include +LDADD = $(GLIB_LIBS) $(GTK_LIBS) $(top_srcdir)/gst/libgst.la diff --git a/libs/idct/README b/libs/idct/README new file mode 100644 index 0000000000..600f3da921 --- /dev/null +++ b/libs/idct/README @@ -0,0 +1,48 @@ +This archive contains a quick & dirty implementation of the IEEE Standard +1180-1990 accuracy test for inverse DCT. It is not guaranteed to be +correct ... but if you find any bugs, please let me know (by email to +tgl@cs.cmu.edu). + +The test harness consists of the C program ieeetest.c and shell script +doieee. For comparison purposes I have also supplied a copy of jrevdct.c, +the inverse DCT routine from release 4 of the Independent JPEG Group's +free JPEG software. (jrevdct.c is slightly modified from the IJG release +so that it will compile without the IJG include files.) jrevdct.c passes +the 1180 test --- or at least, this program thinks so. jrevdct.out is +the output from a test run. + +Note that numerical results may vary somewhat across machines. This appears +to be mostly due to differing results from the cosine function. + + +INSTALLATION: + Check the Makefile, change CC and CFLAGS if needed. Then say "make". +If your C compiler is non-ANSI, you may need to change includes and/or +function headers. + + To test a different IDCT routine, link with that routine instead of +jrevdct.o. You will need to modify dct.h and/or ieeetest.c if your +routine's calling convention is not in-place modification of an array +of 64 "short"s. + + +USAGE: + The standard test procedure is + doieee ieeetest >outputfile +Expect it to take a while (almost 80 minutes on my old 68030 box). +Each of the six passes will emit a row of 100 dots as it runs. + +You can grep the output for the word FAILS if you just want to know +yea or nay. + + +LEGAL MUMBO-JUMBO: + I hereby release the test harness to the public domain. + Thomas G. Lane, 22 Nov 1993 + +IMPORTANT: jrevdct.c is NOT public domain, but is copyrighted free software +(not the same thing at all). It is subject to IJG's distribution terms, which +primarily state that if you incorporate it into a program you must acknowledge +IJG's contribution in your program documentation. For more details and the +complete IJG software, see the IJG FTP archive at ftp.uu.net, in directory +/graphics/jpeg. diff --git a/libs/idct/dct.h b/libs/idct/dct.h new file mode 100644 index 0000000000..dddad743a9 --- /dev/null +++ b/libs/idct/dct.h @@ -0,0 +1,29 @@ +/* define DCT types */ + +/* + * DCTSIZE underlying (1d) transform size + * DCTSIZE2 DCTSIZE squared + */ + +#define DCTSIZE (8) +#define DCTSIZE2 (DCTSIZE*DCTSIZE) + +#define EIGHT_BIT_SAMPLES /* needed in jrevdct.c */ + +typedef short DCTELEM; /* must be at least 16 bits */ + +typedef DCTELEM DCTBLOCK[DCTSIZE2]; + +typedef long INT32; /* must be at least 32 bits */ + +extern void gst_idct_int_idct(); + +extern void gst_idct_init_fast_int_idct (void); +extern void gst_idct_fast_int_idct (short *block); + +extern void gst_idct_mmx_idct (short *block); +extern void gst_idct_mmx32_idct (short *block); + +extern void gst_idct_init_float_idct(void); +extern void gst_idct_float_idct (short *block); + diff --git a/libs/idct/doieee b/libs/idct/doieee new file mode 100755 index 0000000000..1d5ff87775 --- /dev/null +++ b/libs/idct/doieee @@ -0,0 +1,15 @@ +# perform IEEE 1180 test series +# Typical usage: +# doieee >outfile +# where progname is ieeetest or a variant + +for i in 1 2 3 4 5; +do +time ./ieeetest $i -256 255 1 10000 +time ./ieeetest $i -5 5 1 10000 +time ./ieeetest $i -300 300 1 10000 + +time ./ieeetest $i -256 255 -1 10000 +time ./ieeetest $i -5 5 -1 10000 +time ./ieeetest $i -300 300 -1 10000 +done diff --git a/libs/idct/fastintidct.c b/libs/idct/fastintidct.c new file mode 100644 index 0000000000..3c9e9bb9ff --- /dev/null +++ b/libs/idct/fastintidct.c @@ -0,0 +1,207 @@ +/* idct.c, inverse fast discrete cosine transform */ + +/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ + +/* + * Disclaimer of Warranty + * + * These software programs are available to the user without any license fee or + * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims + * any and all warranties, whether express, implied, or statuary, including any + * implied warranties or merchantability or of fitness for a particular + * purpose. In no event shall the copyright-holder be liable for any + * incidental, punitive, or consequential damages of any kind whatsoever + * arising from the use of these programs. + * + * This disclaimer of warranty extends to the user of these programs and user's + * customers, employees, agents, transferees, successors, and assigns. + * + * The MPEG Software Simulation Group does not represent or warrant that the + * programs furnished hereunder are free of infringement of any third-party + * patents. + * + * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, + * are subject to royalty fees to patent holders. Many of these patents are + * general enough such that they are unavoidable regardless of implementation + * design. + * + */ + +/**********************************************************/ +/* inverse two dimensional DCT, Chen-Wang algorithm */ +/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */ +/* 32-bit integer arithmetic (8 bit coefficients) */ +/* 11 mults, 29 adds per DCT */ +/* sE, 18.8.91 */ +/**********************************************************/ +/* coefficients extended to 12 bit for IEEE1180-1990 */ +/* compliance sE, 2.1.94 */ +/**********************************************************/ + +/* this code assumes >> to be a two's-complement arithmetic */ +/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ + +#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */ +#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */ +#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */ +#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */ +#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */ +#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */ + +#include "dct.h" + +/* private data */ +static short iclip[1024]; /* clipping table */ +static short *iclp; + +/* private prototypes */ +static void idctrow (short *blk); +static void idctcol (short *blk); + +/* row (horizontal) IDCT + * + * 7 pi 1 + * dst[k] = sum c[l] * src[l] * cos( -- * ( k + - ) * l ) + * l=0 8 2 + * + * where: c[0] = 128 + * c[1..7] = 128*sqrt(2) + */ + +static void idctrow(blk) +short *blk; +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = blk[4]<<11) | (x2 = blk[6]) | (x3 = blk[2]) | + (x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3]))) + { + blk[0]=blk[1]=blk[2]=blk[3]=blk[4]=blk[5]=blk[6]=blk[7]=blk[0]<<3; + return; + } + + x0 = (blk[0]<<11) + 128; /* for proper rounding in the fourth stage */ + + /* first stage */ + x8 = W7*(x4+x5); + x4 = x8 + (W1-W7)*x4; + x5 = x8 - (W1+W7)*x5; + x8 = W3*(x6+x7); + x6 = x8 - (W3-W5)*x6; + x7 = x8 - (W3+W5)*x7; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6*(x3+x2); + x2 = x1 - (W2+W6)*x2; + x3 = x1 + (W2-W6)*x3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181*(x4+x5)+128)>>8; + x4 = (181*(x4-x5)+128)>>8; + + /* fourth stage */ + blk[0] = (x7+x1)>>8; + blk[1] = (x3+x2)>>8; + blk[2] = (x0+x4)>>8; + blk[3] = (x8+x6)>>8; + blk[4] = (x8-x6)>>8; + blk[5] = (x0-x4)>>8; + blk[6] = (x3-x2)>>8; + blk[7] = (x7-x1)>>8; +} + +/* column (vertical) IDCT + * + * 7 pi 1 + * dst[8*k] = sum c[l] * src[8*l] * cos( -- * ( k + - ) * l ) + * l=0 8 2 + * + * where: c[0] = 1/1024 + * c[1..7] = (1/1024)*sqrt(2) + */ +static void idctcol(blk) +short *blk; +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + if (!((x1 = (blk[8*4]<<8)) | (x2 = blk[8*6]) | (x3 = blk[8*2]) | + (x4 = blk[8*1]) | (x5 = blk[8*7]) | (x6 = blk[8*5]) | (x7 = blk[8*3]))) + { + blk[8*0]=blk[8*1]=blk[8*2]=blk[8*3]=blk[8*4]=blk[8*5]=blk[8*6]=blk[8*7]= + iclp[(blk[8*0]+32)>>6]; + return; + } + + x0 = (blk[8*0]<<8) + 8192; + + /* first stage */ + x8 = W7*(x4+x5) + 4; + x4 = (x8+(W1-W7)*x4)>>3; + x5 = (x8-(W1+W7)*x5)>>3; + x8 = W3*(x6+x7) + 4; + x6 = (x8-(W3-W5)*x6)>>3; + x7 = (x8-(W3+W5)*x7)>>3; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6*(x3+x2) + 4; + x2 = (x1-(W2+W6)*x2)>>3; + x3 = (x1+(W2-W6)*x3)>>3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181*(x4+x5)+128)>>8; + x4 = (181*(x4-x5)+128)>>8; + + /* fourth stage */ + blk[8*0] = iclp[(x7+x1)>>14]; + blk[8*1] = iclp[(x3+x2)>>14]; + blk[8*2] = iclp[(x0+x4)>>14]; + blk[8*3] = iclp[(x8+x6)>>14]; + blk[8*4] = iclp[(x8-x6)>>14]; + blk[8*5] = iclp[(x0-x4)>>14]; + blk[8*6] = iclp[(x3-x2)>>14]; + blk[8*7] = iclp[(x7-x1)>>14]; +} + +/* two dimensional inverse discrete cosine transform */ +void gst_idct_fast_int_idct(block) +short *block; +{ + int i; + + for (i=0; i<8; i++) + idctrow(block+8*i); + + for (i=0; i<8; i++) + idctcol(block+i); +} + +void gst_idct_init_fast_int_idct() +{ + int i; + + iclp = iclip+512; + for (i= -512; i<512; i++) + iclp[i] = (i<-256) ? -256 : ((i>255) ? 255 : i); +} diff --git a/libs/idct/floatidct.c b/libs/idct/floatidct.c new file mode 100644 index 0000000000..520c391335 --- /dev/null +++ b/libs/idct/floatidct.c @@ -0,0 +1,102 @@ +/* Reference_IDCT.c, Inverse Discrete Fourier Transform, double precision */ + +/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ + +/* + * Disclaimer of Warranty + * + * These software programs are available to the user without any license fee or + * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims + * any and all warranties, whether express, implied, or statuary, including any + * implied warranties or merchantability or of fitness for a particular + * purpose. In no event shall the copyright-holder be liable for any + * incidental, punitive, or consequential damages of any kind whatsoever + * arising from the use of these programs. + * + * This disclaimer of warranty extends to the user of these programs and user's + * customers, employees, agents, transferees, successors, and assigns. + * + * The MPEG Software Simulation Group does not represent or warrant that the + * programs furnished hereunder are free of infringement of any third-party + * patents. + * + * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, + * are subject to royalty fees to patent holders. Many of these patents are + * general enough such that they are unavoidable regardless of implementation + * design. + * + */ + +/* Perform IEEE 1180 reference (64-bit floating point, separable 8x1 + * direct matrix multiply) Inverse Discrete Cosine Transform +*/ + + +/* Here we use math.h to generate constants. Compiler results may + vary a little */ + +#include + +#ifndef PI +# ifdef M_PI +# define PI M_PI +# else +# define PI 3.14159265358979323846 +# endif +#endif + +/* private data */ + +/* cosine transform matrix for 8x1 IDCT */ +static double gst_idct_float_c[8][8]; + +/* initialize DCT coefficient matrix */ + +void gst_idct_init_float_idct() +{ + int freq, time; + double scale; + + for (freq=0; freq < 8; freq++) + { + scale = (freq == 0) ? sqrt(0.125) : 0.5; + for (time=0; time<8; time++) + gst_idct_float_c[freq][time] = scale*cos((PI/8.0)*freq*(time + 0.5)); + } +} + +/* perform IDCT matrix multiply for 8x8 coefficient block */ + +void gst_idct_float_idct(block) +short *block; +{ + int i, j, k, v; + double partial_product; + double tmp[64]; + + for (i=0; i<8; i++) + for (j=0; j<8; j++) + { + partial_product = 0.0; + + for (k=0; k<8; k++) + partial_product+= gst_idct_float_c[k][j]*block[8*i+k]; + + tmp[8*i+j] = partial_product; + } + + /* Transpose operation is integrated into address mapping by switching + loop order of i and j */ + + for (j=0; j<8; j++) + for (i=0; i<8; i++) + { + partial_product = 0.0; + + for (k=0; k<8; k++) + partial_product+= gst_idct_float_c[k][i]*tmp[8*k+j]; + + v = (int) floor(partial_product+0.5); + block[8*i+j] = (v<-256) ? -256 : ((v>255) ? 255 : v); + } +} diff --git a/libs/idct/gstidct.c b/libs/idct/gstidct.c new file mode 100644 index 0000000000..9f7d60f0da --- /dev/null +++ b/libs/idct/gstidct.c @@ -0,0 +1,111 @@ +/* Gnome-Streamer + * Copyright (C) <1999> Erik Walthinsen + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + + +#include + +#include "gstidct.h" +#include "dct.h" + +static void gst_idct_int_sparse_idct(short *data); + +GstIDCT *gst_idct_new(GstIDCTMethod method) +{ + GstIDCT *new = g_malloc(sizeof(GstIDCT)); + + new->need_transpose = FALSE; + + if (method == GST_IDCT_DEFAULT) { +#ifdef HAVE_LIBMMX + method = GST_IDCT_MMX32; +#else + method = GST_IDCT_FAST_INT; +#endif + } + + new->convert_sparse = gst_idct_int_sparse_idct; + + switch (method) { + case GST_IDCT_FAST_INT: + g_print("GstIDCT: using fast_int_idct\n"); + gst_idct_init_fast_int_idct(); + new->convert = gst_idct_fast_int_idct; + break; + case GST_IDCT_INT: + g_print("GstIDCT: using int_idct\n"); + new->convert = gst_idct_int_idct; + break; + case GST_IDCT_FLOAT: + g_print("GstIDCT: using float_idct\n"); + gst_idct_init_float_idct(); + new->convert = gst_idct_float_idct; + break; + case GST_IDCT_MMX: + g_print("GstIDCT: using MMX_idct\n"); + new->convert = gst_idct_mmx_idct; + new->need_transpose = TRUE; + break; + case GST_IDCT_MMX32: + g_print("GstIDCT: using MMX32_idct\n"); + new->convert = gst_idct_mmx32_idct; + new->need_transpose = TRUE; + break; + default: + g_print("GstIDCT: method not supported\n"); + g_free(new); + return NULL; + } + return new; +} + +static void gst_idct_int_sparse_idct(short *data) +{ + short val; + gint32 v, *dp = (guint32 *)data; + + v = *data; + + if (v < 0) { + val = -v; + val += (8 >> 1); + val /= 8; + val = -val; + } + else { + val = (v + (8 >> 1)) / 8; + } + v = (( val & 0xffff) | (val << 16)); + + dp[0] = v; dp[1] = v; dp[2] = v; dp[3] = v; + dp[4] = v; dp[5] = v; dp[6] = v; dp[7] = v; + dp[8] = v; dp[9] = v; dp[10] = v; dp[11] = v; + dp[12] = v; dp[13] = v; dp[14] = v; dp[15] = v; + dp[16] = v; dp[17] = v; dp[18] = v; dp[19] = v; + dp[20] = v; dp[21] = v; dp[22] = v; dp[23] = v; + dp[24] = v; dp[25] = v; dp[26] = v; dp[27] = v; + dp[28] = v; dp[29] = v; dp[30] = v; dp[31] = v; +} + +void gst_idct_destroy(GstIDCT *idct) +{ + g_return_if_fail(idct != NULL); + + g_free(idct); +} + diff --git a/libs/idct/gstidct.h b/libs/idct/gstidct.h new file mode 100644 index 0000000000..1d26bf4fa1 --- /dev/null +++ b/libs/idct/gstidct.h @@ -0,0 +1,53 @@ +/* Gnome-Streamer + * Copyright (C) <1999> Erik Walthinsen + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + + +#ifndef __GST_IDCT_H__ +#define __GST_IDCT_H__ + +#include + +typedef enum { + GST_IDCT_DEFAULT, // default + GST_IDCT_INT, // integer IDCT + GST_IDCT_FAST_INT, // fastest integer + GST_IDCT_FLOAT, // accurate float version + GST_IDCT_MMX, // fast MMX (not accurate) + GST_IDCT_MMX32, // accurate MMX +} GstIDCTMethod; + +typedef struct _GstIDCT GstIDCT; +typedef void (*GstIDCTFunction) (gshort *block); + +#define GST_IDCT_TRANSPOSE(idct) ((idct)->need_transpose) + +struct _GstIDCT { + /* private */ + GstIDCTFunction convert; + GstIDCTFunction convert_sparse; + gboolean need_transpose; +}; + + +GstIDCT *gst_idct_new(GstIDCTMethod method); +#define gst_idct_convert(idct, blocks) (idct)->convert((blocks)) +#define gst_idct_convert_sparse(idct, blocks) (idct)->convert_sparse((blocks)) +void gst_idct_destroy(GstIDCT *idct); + +#endif /* __GST_IDCT_H__ */ diff --git a/libs/idct/ieeetest.c b/libs/idct/ieeetest.c new file mode 100644 index 0000000000..8751dbf18e --- /dev/null +++ b/libs/idct/ieeetest.c @@ -0,0 +1,339 @@ +/* + * ieeetest.c --- test IDCT code against the IEEE Std 1180-1990 spec + * + * Note that this does only one pass of the test. + * Six invocations of ieeetest are needed to complete the entire spec. + * The shell script "doieee" performs the complete test. + * + * Written by Tom Lane (tgl@cs.cmu.edu). + * Released to public domain 11/22/93. + */ + +#include +#include +#include +#include + +#include +#include "gstidct.h" +#include "dct.h" + + +/* prototypes */ + +void usage (char *msg); +long ieeerand (long L, long H); +void dct_init(void); +void ref_fdct(DCTELEM block[8][8]); +void ref_idct(DCTELEM block[8][8]); + +/* error stat accumulators -- assume initialized to 0 */ + +long sumerrs[DCTSIZE2]; +long sumsqerrs[DCTSIZE2]; +int maxerr[DCTSIZE2]; + + +char * meets (double val, double limit) +{ + return ((fabs(val) <= limit) ? "meets" : "FAILS"); +} + +__inline__ void read_tsc(guint64 *dst) { + __asm__ __volatile__ + ("rdtsc" + : "=a" (*(guint32 *)dst), "=d" (*(((guint32 *)dst) + 1)) + : + : "eax", "edx"); +} + + +int +main(int argc, char **argv) +{ + long minpix, maxpix, sign; + long curiter, niters; + int i, j; + double max, total; + int method; + DCTELEM block[DCTSIZE2]; /* random source data */ + DCTELEM refcoefs[DCTSIZE2]; /* coefs from reference FDCT */ + DCTELEM refout[DCTSIZE2]; /* output from reference IDCT */ + DCTELEM testout[DCTSIZE2]; /* output from test IDCT */ + GstIDCT *idct; + guint64 tscstart, tscmin = ~0, tscmax = 0; + guint64 tscstop; + + /* Argument parsing --- not very bulletproof at all */ + + if (argc != 6) usage(NULL); + + method = atoi(argv[1]); + minpix = atoi(argv[2]); + maxpix = atoi(argv[3]); + sign = atoi(argv[4]); + niters = atol(argv[5]); + + gst_library_load("gstidct"); + + idct = gst_idct_new(method); + + dct_init(); + + /* Loop once per generated random-data block */ + + for (curiter = 0; curiter < niters; curiter++) { + + /* generate a pseudo-random block of data */ + for (i = 0; i < DCTSIZE2; i++) + block[i] = (DCTELEM) (ieeerand(-minpix,maxpix) * sign); + + /* perform reference FDCT */ + memcpy(refcoefs, block, sizeof(DCTELEM)*DCTSIZE2); + ref_fdct(refcoefs); + /* clip */ + for (i = 0; i < DCTSIZE2; i++) { + if (refcoefs[i] < -2048) refcoefs[i] = -2048; + else if (refcoefs[i] > 2047) refcoefs[i] = 2047; + } + + /* perform reference IDCT */ + memcpy(refout, refcoefs, sizeof(DCTELEM)*DCTSIZE2); + ref_idct(refout); + /* clip */ + for (i = 0; i < DCTSIZE2; i++) { + if (refout[i] < -256) refout[i] = -256; + else if (refout[i] > 255) refout[i] = 255; + } + + /* perform test IDCT */ + if (GST_IDCT_TRANSPOSE(idct)) { + for (j = 0; j < DCTSIZE; j++) { + for (i = 0; i < DCTSIZE; i++) { + testout[i*DCTSIZE+j] = refcoefs[j*DCTSIZE+i]; + } + } + } + else { + memcpy(testout, refcoefs, sizeof(DCTELEM)*DCTSIZE2); + } + + read_tsc(&tscstart); + gst_idct_convert(idct, testout); + read_tsc(&tscstop); + //printf("time %llu, %llu %lld\n", tscstart, tscstop, tscstop-tscstart); + if (tscstop - tscstart < tscmin) tscmin = tscstop-tscstart; + if (tscstop - tscstart > tscmax) tscmax = tscstop-tscstart; + + /* clip */ + for (i = 0; i < DCTSIZE2; i++) { + if (testout[i] < -256) testout[i] = -256; + else if (testout[i] > 255) testout[i] = 255; + } + + /* accumulate error stats */ + for (i = 0; i < DCTSIZE2; i++) { + register int err = testout[i] - refout[i]; + sumerrs[i] += err; + sumsqerrs[i] += err * err; + if (err < 0) err = -err; + if (maxerr[i] < err) maxerr[i] = err; + } + + if (curiter % 100 == 99) { + fprintf(stderr, "."); + fflush(stderr); + } + } + fprintf(stderr, "\n"); + + /* print results */ + + printf("IEEE test conditions: -L = %ld, +H = %ld, sign = %ld, #iters = %ld\n", + minpix, maxpix, sign, niters); + + printf("Speed, min time %lld, max %lld\n", tscmin, tscmax); + + printf("Peak absolute values of errors:\n"); + for (i = 0, j = 0; i < DCTSIZE2; i++) { + if (j < maxerr[i]) j = maxerr[i]; + printf("%4d", maxerr[i]); + if ((i%DCTSIZE) == DCTSIZE-1) printf("\n"); + } + printf("Worst peak error = %d (%s spec limit 1)\n\n", j, + meets((double) j, 1.0)); + + printf("Mean square errors:\n"); + max = total = 0.0; + for (i = 0; i < DCTSIZE2; i++) { + double err = (double) sumsqerrs[i] / ((double) niters); + total += (double) sumsqerrs[i]; + if (max < err) max = err; + printf(" %8.4f", err); + if ((i%DCTSIZE) == DCTSIZE-1) printf("\n"); + } + printf("Worst pmse = %.6f (%s spec limit 0.06)\n", max, meets(max, 0.06)); + total /= (double) (64*niters); + printf("Overall mse = %.6f (%s spec limit 0.02)\n\n", total, + meets(total, 0.02)); + + printf("Mean errors:\n"); + max = total = 0.0; + for (i = 0; i < DCTSIZE2; i++) { + double err = (double) sumerrs[i] / ((double) niters); + total += (double) sumerrs[i]; + printf(" %8.4f", err); + if (err < 0.0) err = -err; + if (max < err) max = err; + if ((i%DCTSIZE) == DCTSIZE-1) printf("\n"); + } + printf("Worst mean error = %.6f (%s spec limit 0.015)\n", max, + meets(max, 0.015)); + total /= (double) (64*niters); + printf("Overall mean error = %.6f (%s spec limit 0.0015)\n\n", total, + meets(total, 0.0015)); + + /* test for 0 input giving 0 output */ + memset(testout, 0, sizeof(DCTELEM)*DCTSIZE2); + gst_idct_convert(idct, testout); + for (i = 0, j=0; i < DCTSIZE2; i++) { + if (testout[i]) { + printf("Position %d of IDCT(0) = %d (FAILS)\n", i, testout[i]); + j++; + } + } + printf("%d elements of IDCT(0) were not zero\n\n\n", j); + + exit(0); + return 0; +} + + +void usage (char *msg) +{ + if (msg != NULL) + fprintf(stderr, "\nerror: %s\n", msg); + + fprintf(stderr, "\n"); + fprintf(stderr, "usage: ieeetest minpix maxpix sign niters\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " test = 1 - 5\n"); + fprintf(stderr, " minpix = -L value per IEEE spec\n"); + fprintf(stderr, " maxpix = H value per IEEE spec\n"); + fprintf(stderr, " sign = +1 for normal, -1 to run negated test\n"); + fprintf(stderr, " niters = # iterations (10000 for full test)\n"); + fprintf(stderr, "\n"); + + exit(1); +} + + +/* Pseudo-random generator specified by IEEE 1180 */ + +long ieeerand (long L, long H) +{ + static long randx = 1; + static double z = (double) 0x7fffffff; + + long i,j; + double x; + + randx = (randx * 1103515245) + 12345; + i = randx & 0x7ffffffe; + x = ((double) i) / z; + x *= (L+H+1); + j = x; + return j-L; +} + + +/* Reference double-precision FDCT and IDCT */ + + +/* The cosine lookup table */ +/* coslu[a][b] = C(b)/2 * cos[(2a+1)b*pi/16] */ +double coslu[8][8]; + + +/* Routine to initialise the cosine lookup table */ +void dct_init(void) +{ + int a,b; + double tmp; + + for(a=0;a<8;a++) + for(b=0;b<8;b++) { + tmp = cos((double)((a+a+1)*b) * (3.14159265358979323846 / 16.0)); + if(b==0) + tmp /= sqrt(2.0); + coslu[a][b] = tmp * 0.5; + } +} + + +void ref_fdct (DCTELEM block[8][8]) +{ + int x,y,u,v; + double tmp, tmp2; + double res[8][8]; + + for (v=0; v<8; v++) { + for (u=0; u<8; u++) { + tmp = 0.0; + for (y=0; y<8; y++) { + tmp2 = 0.0; + for (x=0; x<8; x++) { + tmp2 += (double) block[y][x] * coslu[x][u]; + } + tmp += coslu[y][v] * tmp2; + } + res[v][u] = tmp; + } + } + + for (v=0; v<8; v++) { + for (u=0; u<8; u++) { + tmp = res[v][u]; + if (tmp < 0.0) { + x = - ((int) (0.5 - tmp)); + } else { + x = (int) (tmp + 0.5); + } + block[v][u] = (DCTELEM) x; + } + } +} + + +void ref_idct (DCTELEM block[8][8]) +{ + int x,y,u,v; + double tmp, tmp2; + double res[8][8]; + + for (y=0; y<8; y++) { + for (x=0; x<8; x++) { + tmp = 0.0; + for (v=0; v<8; v++) { + tmp2 = 0.0; + for (u=0; u<8; u++) { + tmp2 += (double) block[v][u] * coslu[x][u]; + } + tmp += coslu[y][v] * tmp2; + } + res[y][x] = tmp; + } + } + + for (v=0; v<8; v++) { + for (u=0; u<8; u++) { + tmp = res[v][u]; + if (tmp < 0.0) { + x = - ((int) (0.5 - tmp)); + } else { + x = (int) (tmp + 0.5); + } + block[v][u] = (DCTELEM) x; + } + } +} diff --git a/libs/idct/intidct.c b/libs/idct/intidct.c new file mode 100644 index 0000000000..119b7fd14b --- /dev/null +++ b/libs/idct/intidct.c @@ -0,0 +1,391 @@ +/* + * jrevdct.c + * + * Copyright (C) 1991, 1992, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains the basic inverse-DCT transformation subroutine. + * + * This implementation is based on an algorithm described in + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. + * The primary algorithm described there uses 11 multiplies and 29 adds. + * We use their alternate method with 12 multiplies and 32 adds. + * The advantage of this method is that no data path contains more than one + * multiplication; this allows a very simple and accurate implementation in + * scaled fixed-point arithmetic, with a minimal number of shifts. + */ + +#include "dct.h" + +/* We assume that right shift corresponds to signed division by 2 with + * rounding towards minus infinity. This is correct for typical "arithmetic + * shift" instructions that shift in copies of the sign bit. But some + * C compilers implement >> with an unsigned shift. For these machines you + * must define RIGHT_SHIFT_IS_UNSIGNED. + * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity. + * It is only applied with constant shift counts. SHIFT_TEMPS must be + * included in the variables of any routine using RIGHT_SHIFT. + */ + +#ifdef RIGHT_SHIFT_IS_UNSIGNED +#define SHIFT_TEMPS INT32 shift_temp; +#define RIGHT_SHIFT(x,shft) \ + ((shift_temp = (x)) < 0 ? \ + (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \ + (shift_temp >> (shft))) +#else +#define SHIFT_TEMPS +#define RIGHT_SHIFT(x,shft) ((x) >> (shft)) +#endif + + +/* + * This routine is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ +#endif + + +/* + * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * The poop on this scaling stuff is as follows: + * + * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) + * larger than the true IDCT outputs. The final outputs are therefore + * a factor of N larger than desired; since N=8 this can be cured by + * a simple right shift at the end of the algorithm. The advantage of + * this arrangement is that we save two multiplications per 1-D IDCT, + * because the y0 and y4 inputs need not be divided by sqrt(N). + * + * We have to do addition and subtraction of the integer inputs, which + * is no problem, and multiplication by fractional constants, which is + * a problem to do in integer arithmetic. We multiply all the constants + * by CONST_SCALE and convert them to integer constants (thus retaining + * CONST_BITS bits of precision in the constants). After doing a + * multiplication we have to divide the product by CONST_SCALE, with proper + * rounding, to produce the correct output. This division can be done + * cheaply as a right shift of CONST_BITS bits. We postpone shifting + * as long as possible so that partial sums can be added together with + * full fractional precision. + * + * The outputs of the first pass are scaled up by PASS1_BITS bits so that + * they are represented to better-than-integral precision. These outputs + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word + * with the recommended scaling. (To scale up 12-bit sample data further, an + * intermediate INT32 array would be needed.) + * + * To avoid overflow of the 32-bit intermediate results in pass 2, we must + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis + * shows that the values given below are the most effective. + */ + +#ifdef EIGHT_BIT_SAMPLES +#define CONST_BITS 13 +#define PASS1_BITS 2 +#else +#define CONST_BITS 13 +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#endif + +#define ONE ((INT32) 1) + +#define CONST_SCALE (ONE << CONST_BITS) + +/* Convert a positive real constant to an integer scaled by CONST_SCALE. */ + +#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) + +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus + * causing a lot of useless floating-point operations at run time. + * To get around this we use the following pre-calculated constants. + * If you change CONST_BITS you may want to add appropriate values. + * (With a reasonable C compiler, you can just rely on the FIX() macro...) + */ + +#if CONST_BITS == 13 +#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ +#else +#define FIX_0_298631336 FIX(0.298631336) +#define FIX_0_390180644 FIX(0.390180644) +#define FIX_0_541196100 FIX(0.541196100) +#define FIX_0_765366865 FIX(0.765366865) +#define FIX_0_899976223 FIX(0.899976223) +#define FIX_1_175875602 FIX(1.175875602) +#define FIX_1_501321110 FIX(1.501321110) +#define FIX_1_847759065 FIX(1.847759065) +#define FIX_1_961570560 FIX(1.961570560) +#define FIX_2_053119869 FIX(2.053119869) +#define FIX_2_562915447 FIX(2.562915447) +#define FIX_3_072711026 FIX(3.072711026) +#endif + + +/* Descale and correctly round an INT32 value that's scaled by N bits. + * We assume RIGHT_SHIFT rounds towards minus infinity, so adding + * the fudge factor is correct for either sign of X. + */ + +#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) + +/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. + * For 8-bit samples with the recommended scaling, all the variable + * and constant values involved are no more than 16 bits wide, so a + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; + * this provides a useful speedup on many machines. + * There is no way to specify a 16x16->32 multiply in portable C, but + * some C compilers will do the right thing if you provide the correct + * combination of casts. + * NB: for 12-bit samples, a full 32-bit multiplication will be needed. + */ + +#ifdef EIGHT_BIT_SAMPLES +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) +#endif +#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ +#define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const))) +#endif +#endif + +#ifndef MULTIPLY /* default definition */ +#define MULTIPLY(var,const) ((var) * (const)) +#endif + + +/* + * Perform the inverse DCT on one block of coefficients. + */ + +void +gst_idct_int_idct (DCTBLOCK data) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2, z3, z4, z5; + register DCTELEM *dataptr; + int rowctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + + if ((dataptr[1] | dataptr[2] | dataptr[3] | dataptr[4] | + dataptr[5] | dataptr[6] | dataptr[7]) == 0) { + /* AC terms all zero */ + DCTELEM dcval = (DCTELEM) (dataptr[0] << PASS1_BITS); + + dataptr[0] = dcval; + dataptr[1] = dcval; + dataptr[2] = dcval; + dataptr[3] = dcval; + dataptr[4] = dcval; + dataptr[5] = dcval; + dataptr[6] = dcval; + dataptr[7] = dcval; + + dataptr += DCTSIZE; /* advance pointer to next row */ + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z2 = (INT32) dataptr[2]; + z3 = (INT32) dataptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + tmp0 = ((INT32) dataptr[0] + (INT32) dataptr[4]) << CONST_BITS; + tmp1 = ((INT32) dataptr[0] - (INT32) dataptr[4]) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + tmp0 = (INT32) dataptr[7]; + tmp1 = (INT32) dataptr[5]; + tmp2 = (INT32) dataptr[3]; + tmp3 = (INT32) dataptr[1]; + + z1 = tmp0 + tmp3; + z2 = tmp1 + tmp2; + z3 = tmp0 + tmp2; + z4 = tmp1 + tmp3; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ + + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); + dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + +#ifndef NO_ZERO_COLUMN_TEST + if ((dataptr[DCTSIZE*1] | dataptr[DCTSIZE*2] | dataptr[DCTSIZE*3] | + dataptr[DCTSIZE*4] | dataptr[DCTSIZE*5] | dataptr[DCTSIZE*6] | + dataptr[DCTSIZE*7]) == 0) { + /* AC terms all zero */ + DCTELEM dcval = (DCTELEM) DESCALE((INT32) dataptr[0], PASS1_BITS+3); + + dataptr[DCTSIZE*0] = dcval; + dataptr[DCTSIZE*1] = dcval; + dataptr[DCTSIZE*2] = dcval; + dataptr[DCTSIZE*3] = dcval; + dataptr[DCTSIZE*4] = dcval; + dataptr[DCTSIZE*5] = dcval; + dataptr[DCTSIZE*6] = dcval; + dataptr[DCTSIZE*7] = dcval; + + dataptr++; /* advance pointer to next column */ + continue; + } +#endif + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z2 = (INT32) dataptr[DCTSIZE*2]; + z3 = (INT32) dataptr[DCTSIZE*6]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + tmp0 = ((INT32) dataptr[DCTSIZE*0] + (INT32) dataptr[DCTSIZE*4]) << CONST_BITS; + tmp1 = ((INT32) dataptr[DCTSIZE*0] - (INT32) dataptr[DCTSIZE*4]) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + tmp0 = (INT32) dataptr[DCTSIZE*7]; + tmp1 = (INT32) dataptr[DCTSIZE*5]; + tmp2 = (INT32) dataptr[DCTSIZE*3]; + tmp3 = (INT32) dataptr[DCTSIZE*1]; + + z1 = tmp0 + tmp3; + z2 = tmp1 + tmp2; + z3 = tmp0 + tmp2; + z4 = tmp1 + tmp3; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ + + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, + CONST_BITS+PASS1_BITS+3); + + dataptr++; /* advance pointer to next column */ + } +} diff --git a/libs/idct/mmx32idct.c b/libs/idct/mmx32idct.c new file mode 100644 index 0000000000..78bf45bf25 --- /dev/null +++ b/libs/idct/mmx32idct.c @@ -0,0 +1,783 @@ +/* + * idctmmx32.cpp + * + * Copyright (C) Alberto Vigata - January 2000 - ultraflask@yahoo.com + * + * This file is part of FlasKMPEG, a free MPEG to MPEG/AVI converter + * + * FlasKMPEG is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * FlasKMPEG is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +// MMX32 iDCT algorithm (IEEE-1180 compliant) :: idct_mmx32() +// +// MPEG2AVI +// -------- +// v0.16B33 initial release +// +// This was one of the harder pieces of work to code. +// Intel's app-note focuses on the numerical issues of the algorithm, but +// assumes the programmer is familiar with IDCT mathematics, leaving the +// form of the complete function up to the programmer's imagination. +// +// ALGORITHM OVERVIEW +// ------------------ +// I played around with the code for quite a few hours. I came up +// with *A* working IDCT algorithm, however I'm not sure whether my routine +// is "the correct one." But rest assured, my code passes all six IEEE +// accuracy tests with plenty of margin. +// +// My IDCT algorithm consists of 4 steps: +// +// 1) IDCT-row transformation (using the IDCT-row function) on all 8 rows +// This yields an intermediate 8x8 matrix. +// +// 2) intermediate matrix transpose (mandatory) +// +// 3) IDCT-row transformation (2nd time) on all 8 rows of the intermediate +// matrix. The output is the final-result, in transposed form. +// +// 4) post-transformation matrix transpose +// (not necessary if the input-data is already transposed, this could +// be done during the MPEG "zig-zag" scan, but since my algorithm +// requires at least one transpose operation, why not re-use the +// transpose-code.) +// +// Although the (1st) and (3rd) steps use the SAME row-transform operation, +// the (3rd) step uses different shift&round constants (explained later.) +// +// Also note that the intermediate transpose (2) would not be neccessary, +// if the subsequent operation were a iDCT-column transformation. Since +// we only have the iDCT-row transform, we transpose the intermediate +// matrix and use the iDCT-row transform a 2nd time. +// +// I had to change some constants/variables for my method to work : +// +// As given by Intel, the #defines for SHIFT_INV_COL and RND_INV_COL are +// wrong. Not surprising since I'm not using a true column-transform +// operation, but the row-transform operation (as mentioned earlier.) +// round_inv_col[], which is given as "4 short" values, should have the +// same dimensions as round_inv_row[]. The corrected variables are +// shown. +// +// Intel's code defines a different table for each each row operation. +// The tables given are 0/4, 1/7, 2/6, and 5/3. My code only uses row#0. +// Using the other rows messes up the overall transform. +// +// IMPLEMENTATION DETAILs +// ---------------------- +// +// I divided the algorithm's work into two subroutines, +// 1) idct_mmx32_rows() - transforms 8 rows, then transpose +// 2) idct_mmx32_cols() - transforms 8 rows, then transpose +// yields final result ("drop-in" direct replacement for INT32 IDCT) +// +// The 2nd function is a clone of the 1st, with changes made only to the +// shift&rounding instructions. +// +// In the 1st function (rows), the shift & round instructions use +// SHIFT_INV_ROW & round_inv_row[] (renamed to r_inv_row[]) +// +// In the 2nd function (cols)-> r_inv_col[], and +// SHIFT_INV_COL & round_inv_col[] (renamed to r_inv_col[]) +// +// Each function contains an integrated transpose-operator, which comes +// AFTER the primary transformation operation. In the future, I'll optimize +// the code to do more of the transpose-work "in-place". Right now, I've +// left the code as two subroutines and a main calling function, so other +// people can read the code more easily. +// +// liaor@umcc.ais.org http://members.tripod.com/~liaor +// + + +//;============================================================================= +//; +//; AP-922 http://developer.intel.com/vtune/cbts/strmsimd +//; These examples contain code fragments for first stage iDCT 8x8 +//; (for rows) and first stage DCT 8x8 (for columns) +//; +//;============================================================================= +/* +mword typedef qword +qword ptr equ mword ptr */ + +#include + +#define BITS_INV_ACC 4 //; 4 or 5 for IEEE + // 5 yields higher accuracy, but lessens dynamic range on the input matrix +#define SHIFT_INV_ROW (16 - BITS_INV_ACC) +#define SHIFT_INV_COL (1 + BITS_INV_ACC +14 ) // changed from Intel's val) +//#define SHIFT_INV_COL (1 + BITS_INV_ACC ) + +#define RND_INV_ROW (1 << (SHIFT_INV_ROW-1)) +#define RND_INV_COL (1 << (SHIFT_INV_COL-1)) +#define RND_INV_CORR (RND_INV_COL - 1) //; correction -1.0 and round +//#define RND_INV_ROW (1024 * (6 - BITS_INV_ACC)) //; 1 << (SHIFT_INV_ROW-1) +//#define RND_INV_COL (16 * (BITS_INV_ACC - 3)) //; 1 << (SHIFT_INV_COL-1) + + +//.data +//Align 16 +const static long r_inv_row[2] = { RND_INV_ROW, RND_INV_ROW}; +const static long r_inv_col[2] = {RND_INV_COL, RND_INV_COL}; +const static long r_inv_corr[2] = {RND_INV_CORR, RND_INV_CORR }; + +//const static short r_inv_col[4] = +// {RND_INV_COL, RND_INV_COL, RND_INV_COL, RND_INV_COL}; +//const static short r_inv_corr[4] = +// {RND_INV_CORR, RND_INV_CORR, RND_INV_CORR, RND_INV_CORR}; + +/* constants for the forward DCT + +//#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy +//#define SHIFT_FRW_COL BITS_FRW_ACC +//#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) +//#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1) + +const static __int64 one_corr = 0x0001000100010001; +const static long r_frw_row[2] = {RND_FRW_ROW, RND_FRW_ROW }; + +//const static short tg_1_16[4] = {13036, 13036, 13036, 13036 }; //tg * (2<<16) + 0.5 +//const static short tg_2_16[4] = {27146, 27146, 27146, 27146 }; //tg * (2<<16) + 0.5 +//const static short tg_3_16[4] = {-21746, -21746, -21746, -21746 }; //tg * (2<<16) + 0.5 +//const static short cos_4_16[4] = {-19195, -19195, -19195, -19195 }; //cos * (2<<16) + 0.5 +//const static short ocos_4_16[4] = {23170, 23170, 23170, 23170 }; //cos * (2<<15) + 0.5 + +//concatenated table, for forward DCT transformation +const static short tg_all_16[] = { + 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 + 27146, 27146, 27146, 27146, //tg * (2<<16) + 0.5 + -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 + -19195, -19195, -19195, -19195, //cos * (2<<16) + 0.5 + 23170, 23170, 23170, 23170 }; //cos * (2<<15) + 0.5 + +#define tg_1_16 (tg_all_16 + 0) +#define tg_2_16 (tg_all_16 + 8) +#define tg_3_16 (tg_all_16 + 16) +#define cos_4_16 (tg_all_16 + 24) +#define ocos_4_16 (tg_all_16 + 32) +*/ +/* +;============================================================================= +; +; The first stage iDCT 8x8 - inverse DCTs of rows +; +;----------------------------------------------------------------------------- +; The 8-point inverse DCT direct algorithm +;----------------------------------------------------------------------------- +; +; static const short w[32] = { +; FIX(cos_4_16), FIX(cos_2_16), FIX(cos_4_16), FIX(cos_6_16), +; FIX(cos_4_16), FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16), +; FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16), FIX(cos_2_16), +; FIX(cos_4_16), -FIX(cos_2_16), FIX(cos_4_16), -FIX(cos_6_16), +; FIX(cos_1_16), FIX(cos_3_16), FIX(cos_5_16), FIX(cos_7_16), +; FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16), +; FIX(cos_5_16), -FIX(cos_1_16), FIX(cos_7_16), FIX(cos_3_16), +; FIX(cos_7_16), -FIX(cos_5_16), FIX(cos_3_16), -FIX(cos_1_16) }; +; +; #define DCT_8_INV_ROW(x, y) + +;{ +; int a0, a1, a2, a3, b0, b1, b2, b3; +; +; a0 =x[0]*w[0]+x[2]*w[1]+x[4]*w[2]+x[6]*w[3]; +; a1 =x[0]*w[4]+x[2]*w[5]+x[4]*w[6]+x[6]*w[7]; +; a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11]; +; a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15]; +; b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19]; +; b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23]; +; b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27]; +; b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31]; +; +; y[0] = SHIFT_ROUND ( a0 + b0 ); +; y[1] = SHIFT_ROUND ( a1 + b1 ); +; y[2] = SHIFT_ROUND ( a2 + b2 ); +; y[3] = SHIFT_ROUND ( a3 + b3 ); +; y[4] = SHIFT_ROUND ( a3 - b3 ); +; y[5] = SHIFT_ROUND ( a2 - b2 ); +; y[6] = SHIFT_ROUND ( a1 - b1 ); +; y[7] = SHIFT_ROUND ( a0 - b0 ); +;} +; +;----------------------------------------------------------------------------- +; +; In this implementation the outputs of the iDCT-1D are multiplied +; for rows 0,4 - by cos_4_16, +; for rows 1,7 - by cos_1_16, +; for rows 2,6 - by cos_2_16, +; for rows 3,5 - by cos_3_16 +; and are shifted to the left for better accuracy +; +; For the constants used, +; FIX(float_const) = (short) (float_const * (1<<15) + 0.5) +; +;============================================================================= +;============================================================================= +IF _MMX ; MMX code +;============================================================================= + +//; Table for rows 0,4 - constants are multiplied by cos_4_16 +const short tab_i_04[] = { + 16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00 + 21407, 8867, 8867, -21407, // w07 w05 w03 w01 + 16384, -16384, 16384, 16384, //; w14 w12 w10 w08 + -8867, 21407, -21407, -8867, //; w15 w13 w11 w09 + 22725, 12873, 19266, -22725, //; w22 w20 w18 w16 + 19266, 4520, -4520, -12873, //; w23 w21 w19 w17 + 12873, 4520, 4520, 19266, //; w30 w28 w26 w24 + -22725, 19266, -12873, -22725 };//w31 w29 w27 w25 + +//; Table for rows 1,7 - constants are multiplied by cos_1_16 +const short tab_i_17[] = { + 22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00 + 29692, 12299, 12299, -29692, // ; w07 w05 w03 w01 + 22725, -22725, 22725, 22725, //; w14 w12 w10 w08 + -12299, 29692, -29692, -12299, //; w15 w13 w11 w09 + 31521, 17855, 26722, -31521, //; w22 w20 w18 w16 + 26722, 6270, -6270, -17855, //; w23 w21 w19 w17 + 17855, 6270, 6270, 26722, //; w30 w28 w26 w24 + -31521, 26722, -17855, -31521}; // w31 w29 w27 w25 + +//; Table for rows 2,6 - constants are multiplied by cos_2_16 +const short tab_i_26[] = { + 21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00 + 27969, 11585, 11585, -27969, // ; w07 w05 w03 w01 + 21407, -21407, 21407, 21407, // ; w14 w12 w10 w08 + -11585, 27969, -27969, -11585, // ;w15 w13 w11 w09 + 29692, 16819, 25172, -29692, // ;w22 w20 w18 w16 + 25172, 5906, -5906, -16819, // ;w23 w21 w19 w17 + 16819, 5906, 5906, 25172, // ;w30 w28 w26 w24 + -29692, 25172, -16819, -29692}; // ;w31 w29 w27 w25 + + +//; Table for rows 3,5 - constants are multiplied by cos_3_16 +const short tab_i_35[] = { + 19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00 + 25172, 10426, 10426, -25172, //; w07 w05 w03 w01 + 19266, -19266, 19266, 19266, //; w14 w12 w10 w08 + -10426, 25172, -25172, -10426, //; w15 w13 w11 w09 + 26722, 15137, 22654, -26722, //; w22 w20 w18 w16 + 22654, 5315, -5315, -15137, //; w23 w21 w19 w17 + 15137, 5315, 5315, 22654, //; w30 w28 w26 w24 + -26722, 22654, -15137, -26722}; //; w31 w29 w27 w25 +*/ + +// CONCATENATED TABLE, rows 0,1,2,3,4,5,6,7 (in order ) +// +// In our implementation, however, we only use row0 ! +// +static const short tab_i_01234567[] = { + //row0, this row is required + 16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00 + 21407, 8867, 8867, -21407, // w07 w05 w03 w01 + 16384, -16384, 16384, 16384, //; w14 w12 w10 w08 + -8867, 21407, -21407, -8867, //; w15 w13 w11 w09 + 22725, 12873, 19266, -22725, //; w22 w20 w18 w16 + 19266, 4520, -4520, -12873, //; w23 w21 w19 w17 + 12873, 4520, 4520, 19266, //; w30 w28 w26 w24 + -22725, 19266, -12873, -22725, //w31 w29 w27 w25 + + // the rest of these rows (1-7), aren't used ! + + //row1 + 22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00 + 29692, 12299, 12299, -29692, // ; w07 w05 w03 w01 + 22725, -22725, 22725, 22725, //; w14 w12 w10 w08 + -12299, 29692, -29692, -12299, //; w15 w13 w11 w09 + 31521, 17855, 26722, -31521, //; w22 w20 w18 w16 + 26722, 6270, -6270, -17855, //; w23 w21 w19 w17 + 17855, 6270, 6270, 26722, //; w30 w28 w26 w24 + -31521, 26722, -17855, -31521, // w31 w29 w27 w25 + + //row2 + 21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00 + 27969, 11585, 11585, -27969, // ; w07 w05 w03 w01 + 21407, -21407, 21407, 21407, // ; w14 w12 w10 w08 + -11585, 27969, -27969, -11585, // ;w15 w13 w11 w09 + 29692, 16819, 25172, -29692, // ;w22 w20 w18 w16 + 25172, 5906, -5906, -16819, // ;w23 w21 w19 w17 + 16819, 5906, 5906, 25172, // ;w30 w28 w26 w24 + -29692, 25172, -16819, -29692, // ;w31 w29 w27 w25 + + //row3 + 19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00 + 25172, 10426, 10426, -25172, //; w07 w05 w03 w01 + 19266, -19266, 19266, 19266, //; w14 w12 w10 w08 + -10426, 25172, -25172, -10426, //; w15 w13 w11 w09 + 26722, 15137, 22654, -26722, //; w22 w20 w18 w16 + 22654, 5315, -5315, -15137, //; w23 w21 w19 w17 + 15137, 5315, 5315, 22654, //; w30 w28 w26 w24 + -26722, 22654, -15137, -26722, //; w31 w29 w27 w25 + + //row4 + 16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00 + 21407, 8867, 8867, -21407, // w07 w05 w03 w01 + 16384, -16384, 16384, 16384, //; w14 w12 w10 w08 + -8867, 21407, -21407, -8867, //; w15 w13 w11 w09 + 22725, 12873, 19266, -22725, //; w22 w20 w18 w16 + 19266, 4520, -4520, -12873, //; w23 w21 w19 w17 + 12873, 4520, 4520, 19266, //; w30 w28 w26 w24 + -22725, 19266, -12873, -22725, //w31 w29 w27 w25 + + //row5 + 19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00 + 25172, 10426, 10426, -25172, //; w07 w05 w03 w01 + 19266, -19266, 19266, 19266, //; w14 w12 w10 w08 + -10426, 25172, -25172, -10426, //; w15 w13 w11 w09 + 26722, 15137, 22654, -26722, //; w22 w20 w18 w16 + 22654, 5315, -5315, -15137, //; w23 w21 w19 w17 + 15137, 5315, 5315, 22654, //; w30 w28 w26 w24 + -26722, 22654, -15137, -26722, //; w31 w29 w27 w25 + + //row6 + 21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00 + 27969, 11585, 11585, -27969, // ; w07 w05 w03 w01 + 21407, -21407, 21407, 21407, // ; w14 w12 w10 w08 + -11585, 27969, -27969, -11585, // ;w15 w13 w11 w09 + 29692, 16819, 25172, -29692, // ;w22 w20 w18 w16 + 25172, 5906, -5906, -16819, // ;w23 w21 w19 w17 + 16819, 5906, 5906, 25172, // ;w30 w28 w26 w24 + -29692, 25172, -16819, -29692, // ;w31 w29 w27 w25 + + //row7 + 22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00 + 29692, 12299, 12299, -29692, // ; w07 w05 w03 w01 + 22725, -22725, 22725, 22725, //; w14 w12 w10 w08 + -12299, 29692, -29692, -12299, //; w15 w13 w11 w09 + 31521, 17855, 26722, -31521, //; w22 w20 w18 w16 + 26722, 6270, -6270, -17855, //; w23 w21 w19 w17 + 17855, 6270, 6270, 26722, //; w30 w28 w26 w24 + -31521, 26722, -17855, -31521}; // w31 w29 w27 w25 + + +#define INP eax // pointer to (short *blk) +#define OUT ecx // pointer to output (temporary store space qwTemp[]) +#define TABLE ebx // pointer to tab_i_01234567[] +#define round_inv_row edx +#define round_inv_col edx + +#define ROW_STRIDE 8 // for 8x8 matrix transposer + +// private variables and functions + +//temporary storage space, 8x8 of shorts + +__inline static void idct_mmx32_rows( short *blk ); // transform rows +__inline static void idct_mmx32_cols( short *blk ); // transform "columns" + // the "column" transform actually transforms rows, it is + // identical to the row-transform except for the ROUNDING + // and SHIFTING coefficients. + + +static void +idct_mmx32_rows( short *blk ) // transform all 8 rows of 8x8 iDCT block +{ + int x; + short qwTemp[64]; + short *out = &qwTemp[0]; + short *inptr = blk; + // this subroutine performs two operations + // 1) iDCT row transform + // for( i = 0; i < 8; ++ i) + // DCT_8_INV_ROW_1( blk[i*8], qwTemp[i] ); + // + // 2) transpose the matrix (which was stored in qwTemp[]) + // qwTemp[] -> [8x8 matrix transpose] -> blk[] + + for (x=0; x<8; x++) { // transform one row per iteration + movq_m2r(*(inptr), mm0); // 0 ; x3 x2 x1 x0 + + movq_m2r(*(inptr+4), mm1); // 1 ; x7 x6 x5 x4 + movq_r2r(mm0, mm2); // 2 ; x3 x2 x1 x0 + + movq_m2r(*(tab_i_01234567), mm3); // 3 ; w06 w04 w02 w00 + punpcklwd_r2r(mm1, mm0); // x5 x1 x4 x0 + + // ---------- + movq_r2r(mm0, mm5); // 5 ; x5 x1 x4 x0 + punpckldq_r2r(mm0, mm0); // x4 x0 x4 x0 + + movq_m2r(*(tab_i_01234567+4), mm4); // 4 ; w07 w05 w03 w01 + punpckhwd_r2r(mm1, mm2); // 1 ; x7 x3 x6 x2 + + pmaddwd_r2r(mm0, mm3); // x4*w06+x0*w04 x4*w02+x0*w00 + movq_r2r(mm2, mm6); // 6 ; x7 x3 x6 x2 + + movq_m2r(*(tab_i_01234567+16), mm1);// 1 ; w22 w20 w18 w16 + punpckldq_r2r(mm2, mm2); // x6 x2 x6 x2 + + pmaddwd_r2r(mm2, mm4); // x6*w07+x2*w05 x6*w03+x2*w01 + punpckhdq_r2r(mm5, mm5); // x5 x1 x5 x1 + + pmaddwd_m2r(*(tab_i_01234567+8), mm0);// x4*w14+x0*w12 x4*w10+x0*w08 + punpckhdq_r2r(mm6, mm6); // x7 x3 x7 x3 + + movq_m2r(*(tab_i_01234567+20), mm7);// 7 ; w23 w21 w19 w17 + pmaddwd_r2r(mm5, mm1); // x5*w22+x1*w20 x5*w18+x1*w16 + + paddd_m2r(*(r_inv_row), mm3);// +rounder + pmaddwd_r2r(mm6, mm7); // x7*w23+x3*w21 x7*w19+x3*w17 + + pmaddwd_m2r(*(tab_i_01234567+12), mm2);// x6*w15+x2*w13 x6*w11+x2*w09 + paddd_r2r(mm4, mm3); // 4 ; a1=sum(even1) a0=sum(even0) + + pmaddwd_m2r(*(tab_i_01234567+24), mm5);// x5*w30+x1*w28 x5*w26+x1*w24 + movq_r2r(mm3, mm4); // 4 ; a1 a0 + + pmaddwd_m2r(*(tab_i_01234567+28), mm6);// x7*w31+x3*w29 x7*w27+x3*w25 + paddd_r2r(mm7, mm1); // 7 ; b1=sum(odd1) b0=sum(odd0) + + paddd_m2r(*(r_inv_row), mm0);// +rounder + psubd_r2r(mm1, mm3); // a1-b1 a0-b0 + + psrad_i2r(SHIFT_INV_ROW, mm3); // y6=a1-b1 y7=a0-b0 + paddd_r2r(mm4, mm1); // 4 ; a1+b1 a0+b0 + + paddd_r2r(mm2, mm0); // 2 ; a3=sum(even3) a2=sum(even2) + psrad_i2r(SHIFT_INV_ROW, mm1); // y1=a1+b1 y0=a0+b0 + + paddd_r2r(mm6, mm5); // 6 ; b3=sum(odd3) b2=sum(odd2) + movq_r2r(mm0, mm4); // 4 ; a3 a2 + + paddd_r2r(mm5, mm0); // a3+b3 a2+b2 + psubd_r2r(mm5, mm4); // 5 ; a3-b3 a2-b2 + + psrad_i2r(SHIFT_INV_ROW, mm4); // y4=a3-b3 y5=a2-b2 + psrad_i2r(SHIFT_INV_ROW, mm0); // y3=a3+b3 y2=a2+b2 + + packssdw_r2r(mm3, mm4); // 3 ; y6 y7 y4 y5 + + packssdw_r2r(mm0, mm1); // 0 ; y3 y2 y1 y0 + movq_r2r(mm4, mm7); // 7 ; y6 y7 y4 y5 + + psrld_i2r(16, mm4); // 0 y6 0 y4 + + movq_r2m(mm1, *(out)); // 1 ; save y3 y2 y1 y0 + pslld_i2r(16, mm7); // y7 0 y5 0 + + por_r2r(mm4, mm7); // 4 ; y7 y6 y5 y4 + + // begin processing row 1 + movq_r2m(mm7, *(out+4)); // 7 ; save y7 y6 y5 y4 + + inptr += 8; + out += 8; + } + + + // done with the iDCT row-transformation + + // now we have to transpose the output 8x8 matrix + // 8x8 (OUT) -> 8x8't' (IN) + // the transposition is implemented as 4 sub-operations. + // 1) transpose upper-left quad + // 2) transpose lower-right quad + // 3) transpose lower-left quad + // 4) transpose upper-right quad + + + // mm0 = 1st row [ A B C D ] row1 + // mm1 = 2nd row [ E F G H ] 2 + // mm2 = 3rd row [ I J K L ] 3 + // mm3 = 4th row [ M N O P ] 4 + + // 1) transpose upper-left quad + out = &qwTemp[0]; + + movq_m2r(*(out + ROW_STRIDE * 0), mm0); + + movq_m2r(*(out + ROW_STRIDE * 1), mm1); + movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D] + + movq_m2r(*(out + ROW_STRIDE * 2), mm2); + punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5] + + movq_m2r(*(out + ROW_STRIDE * 3), mm3); + punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7] + + movq_r2r(mm2, mm6); + punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13] + + punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15] + movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5] + + inptr = blk; + + punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12] + + movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7] + punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13] + + movq_r2m(mm0, *(inptr + ROW_STRIDE * 0)); // store row 1 + punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14] + +// begin reading next quadrant (lower-right) + movq_m2r(*(out + ROW_STRIDE*4 + 4), mm0); + punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15] + + movq_r2m(mm4, *(inptr + ROW_STRIDE * 2)); // store row 3 + movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D] + + movq_r2m(mm1, *(inptr + ROW_STRIDE * 1)); // store row 2 + + movq_m2r(*(out + ROW_STRIDE*5 + 4), mm1); + + movq_r2m(mm3, *(inptr + ROW_STRIDE * 3)); // store row 4 + punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5] + + // 2) transpose lower-right quadrant + +// movq mm0, qword ptr [OUT + ROW_STRIDE*4 + 8] + +// movq mm1, qword ptr [OUT + ROW_STRIDE*5 + 8] +// movq mm4, mm0; // mm4 = copy of row1[A B C D] + + movq_m2r(*(out + ROW_STRIDE*6 + 4), mm2); +// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] + punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7] + + movq_m2r(*(out + ROW_STRIDE*7 + 4), mm3); + movq_r2r(mm2, mm6); + + punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13] + movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5] + + punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15] + movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7] + + punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12] + + punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13] + ; // slot + + movq_r2m(mm0, *(inptr + ROW_STRIDE*4 + 4)); // store row 1 + punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14] + + movq_m2r(*(out + ROW_STRIDE * 4 ), mm0); + punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15] + + movq_r2m(mm4, *(inptr + ROW_STRIDE*6 + 4)); // store row 3 + movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D] + + movq_r2m(mm1, *(inptr + ROW_STRIDE*5 + 4)); // store row 2 + ; // slot + + movq_m2r(*(out + ROW_STRIDE * 5 ), mm1); + ; // slot + + movq_r2m(mm3, *(inptr + ROW_STRIDE*7 + 4)); // store row 4 + punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5] + + // 3) transpose lower-left +// movq mm0, qword ptr [OUT + ROW_STRIDE * 4 ] + +// movq mm1, qword ptr [OUT + ROW_STRIDE * 5 ] +// movq mm4, mm0; // mm4 = copy of row1[A B C D] + + movq_m2r(*(out + ROW_STRIDE * 6 ), mm2); +// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] + punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7] + + movq_m2r(*(out + ROW_STRIDE * 7 ), mm3); + movq_r2r(mm2, mm6); + + punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13] + movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5] + + punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15] + movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7] + + punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12] + + punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13] + ;//slot + + movq_r2m(mm0, *(inptr + ROW_STRIDE * 0 + 4 )); // store row 1 + punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14] + +// begin reading next quadrant (upper-right) + movq_m2r(*(out + ROW_STRIDE*0 + 4), mm0); + punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15] + + movq_r2m(mm4, *(inptr + ROW_STRIDE * 2 + 4)); // store row 3 + movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D] + + movq_r2m(mm1, *(inptr + ROW_STRIDE * 1 + 4)); // store row 2 + movq_m2r(*(out + ROW_STRIDE*1 + 4), mm1); + + movq_r2m(mm3, *(inptr + ROW_STRIDE * 3 + 4)); // store row 4 + punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5] + + + // 2) transpose lower-right quadrant + +// movq mm0, qword ptr [OUT + ROW_STRIDE*4 + 8] + +// movq mm1, qword ptr [OUT + ROW_STRIDE*5 + 8] +// movq mm4, mm0; // mm4 = copy of row1[A B C D] + + movq_m2r(*(out + ROW_STRIDE*2 + 4), mm2); +// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5] + punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7] + + movq_m2r(*(out + ROW_STRIDE*3 + 4), mm3); + movq_r2r(mm2, mm6); + + punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13] + movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5] + + punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15] + movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7] + + punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12] + + punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13] + ; // slot + + movq_r2m(mm0, *(inptr + ROW_STRIDE*4)); // store row 1 + punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14] + + movq_r2m(mm1, *(inptr + ROW_STRIDE*5)); // store row 2 + punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15] + + movq_r2m(mm4, *(inptr + ROW_STRIDE*6)); // store row 3 + ; // slot + + movq_r2m(mm3, *(inptr + ROW_STRIDE*7)); // store row 4 + ; // slot + +} + + +static void +idct_mmx32_cols( short *blk ) // transform all 8 cols of 8x8 iDCT block +{ + int x; + short *inptr = blk; + + // Despite the function's name, the matrix is transformed + // row by row. This function is identical to idct_mmx32_rows(), + // except for the SHIFT amount and ROUND_INV amount. + + // this subroutine performs two operations + // 1) iDCT row transform + // for( i = 0; i < 8; ++ i) + // DCT_8_INV_ROW_1( blk[i*8], qwTemp[i] ); + // + // 2) transpose the matrix (which was stored in qwTemp[]) + // qwTemp[] -> [8x8 matrix transpose] -> blk[] + + + for (x=0; x<8; x++) { // transform one row per iteration + + movq_m2r(*(inptr), mm0); // 0 ; x3 x2 x1 x0 + + movq_m2r(*(inptr+4), mm1); // 1 ; x7 x6 x5 x4 + movq_r2r(mm0, mm2); // 2 ; x3 x2 x1 x0 + + movq_m2r(*(tab_i_01234567), mm3); // 3 ; w06 w04 w02 w00 + punpcklwd_r2r(mm1, mm0); // x5 x1 x4 x0 + +// ---------- + movq_r2r(mm0, mm5); // 5 ; x5 x1 x4 x0 + punpckldq_r2r(mm0, mm0); // x4 x0 x4 x0 + + movq_m2r(*(tab_i_01234567+4), mm4); // 4 ; w07 w05 w03 w01 + punpckhwd_r2r(mm1, mm2); // 1 ; x7 x3 x6 x2 + + pmaddwd_r2r(mm0, mm3); // x4*w06+x0*w04 x4*w02+x0*w00 + movq_r2r(mm2, mm6); // 6 ; x7 x3 x6 x2 + + movq_m2r(*(tab_i_01234567+16), mm1);// 1 ; w22 w20 w18 w16 + punpckldq_r2r(mm2, mm2); // x6 x2 x6 x2 + + pmaddwd_r2r(mm2, mm4); // x6*w07+x2*w05 x6*w03+x2*w01 + punpckhdq_r2r(mm5, mm5); // x5 x1 x5 x1 + + pmaddwd_m2r(*(tab_i_01234567+8), mm0);// x4*w14+x0*w12 x4*w10+x0*w08 + punpckhdq_r2r(mm6, mm6); // x7 x3 x7 x3 + + movq_m2r(*(tab_i_01234567+20), mm7);// 7 ; w23 w21 w19 w17 + pmaddwd_r2r(mm5, mm1); // x5*w22+x1*w20 x5*w18+x1*w16 + + paddd_m2r(*(r_inv_col), mm3);// +rounder + pmaddwd_r2r(mm6, mm7); // x7*w23+x3*w21 x7*w19+x3*w17 + + pmaddwd_m2r(*(tab_i_01234567+12), mm2);// x6*w15+x2*w13 x6*w11+x2*w09 + paddd_r2r(mm4, mm3); // 4 ; a1=sum(even1) a0=sum(even0) + + pmaddwd_m2r(*(tab_i_01234567+24), mm5);// x5*w30+x1*w28 x5*w26+x1*w24 + movq_r2r(mm3, mm4); // 4 ; a1 a0 + + pmaddwd_m2r(*(tab_i_01234567+28), mm6);// x7*w31+x3*w29 x7*w27+x3*w25 + paddd_r2r(mm7, mm1); // 7 ; b1=sum(odd1) b0=sum(odd0) + + paddd_m2r(*(r_inv_col), mm0);// +rounder + psubd_r2r(mm1, mm3); // a1-b1 a0-b0 + + psrad_i2r(SHIFT_INV_COL, mm3); // y6=a1-b1 y7=a0-b0 + paddd_r2r(mm4, mm1); // 4 ; a1+b1 a0+b0 + + paddd_r2r(mm2, mm0); // 2 ; a3=sum(even3) a2=sum(even2) + psrad_i2r(SHIFT_INV_COL, mm1); // y1=a1+b1 y0=a0+b0 + + paddd_r2r(mm6, mm5); // 6 ; b3=sum(odd3) b2=sum(odd2) + movq_r2r(mm0, mm4); // 4 ; a3 a2 + + paddd_r2r(mm5, mm0); // a3+b3 a2+b2 + psubd_r2r(mm5, mm4); // 5 ; a3-b3 a2-b2 + + + psrad_i2r(SHIFT_INV_COL, mm4); // y4=a3-b3 y5=a2-b2 + psrad_i2r(SHIFT_INV_COL, mm0); // y3=a3+b3 y2=a2+b2 + + packssdw_r2r(mm3, mm4); // 3 ; y6 y7 y4 y5 + + packssdw_r2r(mm0, mm1); // 0 ; y3 y2 y1 y0 + movq_r2r(mm4, mm7); // 7 ; y6 y7 y4 y5 + + psrld_i2r(16, mm4); // 0 y6 0 y4 + + movq_r2m(mm1, *(inptr)); // 1 ; save y3 y2 y1 y0 + pslld_i2r(16, mm7); // y7 0 y5 0 + + por_r2r(mm4, mm7); // 4 ; y7 y6 y5 y4 + + // begin processing row 1 + movq_r2m(mm7, *(inptr+4)); // 7 ; save y7 y6 y5 y4 + + inptr += 8; + } + // done with the iDCT column-transformation +} + +// +// public interface to MMX32 IDCT 8x8 operation +// +void +gst_idct_mmx32_idct( short *blk ) +{ + // 1) iDCT row transformation + idct_mmx32_rows( blk ); // 1) transform iDCT row, and transpose + + // 2) iDCT column transformation + idct_mmx32_cols( blk ); // 2) transform iDCT row, and transpose + + emms(); // restore processor state + // all done +} diff --git a/libs/idct/mmxidct.S b/libs/idct/mmxidct.S new file mode 100644 index 0000000000..df43cddf06 --- /dev/null +++ b/libs/idct/mmxidct.S @@ -0,0 +1,725 @@ +/* + * the input data is tranposed and each 16 bit element in the 8x8 matrix + * is left aligned: + * for example in 11...1110000 format + * If the iDCT is of I macroblock then 0.5 needs to be added to the;DC Component + * (element[0][0] of the matrix) + */ + +/* extrn re_matrix */ + +.data + .align 16 + .type preSC,@object +preSC: .short 16384,22725,21407,19266,16384,12873,8867,4520 + .short 22725,31521,29692,26722,22725,17855,12299,6270 + .short 21407,29692,27969,25172,21407,16819,11585,5906 + .short 19266,26722,25172,22654,19266,15137,10426,5315 + .short 16384,22725,21407,19266,16384,12873,8867,4520 + .short 12873,17855,16819,15137,25746,20228,13933,7103 + .short 17734,24598,23170,20853,17734,13933,9597,4892 + .short 18081,25080,23624,21261,18081,14206,9785,4988 + .size preSC,128 + .align 8 + .type x0005000200010001,@object + .size x0005000200010001,8 +x0005000200010001: + .long 0x00010001,0x00050002 + .align 8 + .type x0040000000000000,@object + .size x0040000000000000,8 +x0040000000000000: + .long 0, 0x00400000 + .align 8 + .type x5a825a825a825a82,@object + .size x5a825a825a825a82,8 +x5a825a825a825a82: + .long 0x5a825a82, 0x5a825a82 + .align 8 + .type x539f539f539f539f,@object + .size x539f539f539f539f,8 +x539f539f539f539f: + .long 0x539f539f,0x539f539f + .align 8 + .type x4546454645464546,@object + .size x4546454645464546,8 +x4546454645464546: + .long 0x45464546,0x45464546 + .align 8 + .type x61f861f861f861f8,@object + .size x61f861f861f861f8,8 +x61f861f861f861f8: + .long 0x61f861f8,0x61f861f8 + .type x0004000000000000,@object + .size x0004000000000000,8 +x0004000000000000: + .long 0x00000000,0x00040000 + .type x0000000000000004,@object + .size x0000000000000004,8 +x0000000000000004: + .long 0x00000004,0x00000000 + .align 8 + .type scratch1,@object + .size scratch1,8 +scratch1: + .long 0,0 + .align 8 + .type scratch3,@object + .size scratch3,8 +scratch3: + .long 0,0 + .align 8 + .type scratch5,@object + .size scratch5,8 +scratch5: + .long 0,0 + .align 8 + .type scratch7,@object + .size scratch7,8 +scratch7: + .long 0,0 + .type x0,@object + .size x0,8 +x0: + .long 0,0 + .align 8 +.text + .align 4 +.globl gst_idct_mmx_idct + .type gst_idct_mmx_idct,@function +gst_idct_mmx_idct: + pushl %ebp + movl %esp,%ebp + pushl %ebx + pushl %ecx + pushl %edx + pushl %esi + pushl %edi + movl 8(%ebp),%esi /* source matrix */ + movq (%esi), %mm0 + paddw x0000000000000004, %mm0 + movq 8(%esi), %mm1 + psllw $4, %mm0 + movq 16(%esi), %mm2 + psllw $4, %mm1 + movq 24(%esi), %mm3 + psllw $4, %mm2 + movq 32(%esi), %mm4 + psllw $4, %mm3 + movq 40(%esi), %mm5 + psllw $4, %mm4 + movq 48(%esi), %mm6 + psllw $4, %mm5 + movq 56(%esi), %mm7 + psllw $4, %mm6 + psllw $4, %mm7 + movq %mm0, (%esi) + movq %mm1, 8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + movq 64(%esi), %mm0 + movq 72(%esi), %mm1 + psllw $4, %mm0 + movq 80(%esi), %mm2 + psllw $4, %mm1 + movq 88(%esi), %mm3 + psllw $4, %mm2 + movq 96(%esi), %mm4 + psllw $4, %mm3 + movq 104(%esi), %mm5 + psllw $4, %mm4 + movq 112(%esi), %mm6 + psllw $4, %mm5 + movq 120(%esi), %mm7 + psllw $4, %mm6 + psllw $4, %mm7 + movq %mm0,64(%esi) + movq %mm1,72(%esi) + movq %mm2,80(%esi) + movq %mm3,88(%esi) + movq %mm4,96(%esi) + movq %mm5,104(%esi) + movq %mm6,112(%esi) + movq %mm7,120(%esi) + leal preSC, %ecx +/* column 0: even part + * use V4, V12, V0, V8 to produce V22..V25 + */ + movq 8*12(%ecx), %mm0 /* maybe the first mul can be done together */ + /* with the dequantization in iHuff module */ + pmulhw 8*12(%esi), %mm0 /* V12 */ + movq 8*4(%ecx), %mm1 + pmulhw 8*4(%esi), %mm1 /* V4 */ + movq (%ecx), %mm3 + psraw $1, %mm0 /* t64=t66 */ + pmulhw (%esi), %mm3 /* V0 */ + movq 8*8(%ecx), %mm5 /* duplicate V4 */ + movq %mm1, %mm2 /* added 11/1/96 */ + pmulhw 8*8(%esi),%mm5 /* V8 */ + psubsw %mm0, %mm1 /* V16 */ + pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */ + paddsw %mm0, %mm2 /* V17 */ + movq %mm2, %mm0 /* duplicate V17 */ + psraw $1, %mm2 /* t75=t82 */ + psraw $2, %mm0 /* t72 */ + movq %mm3, %mm4 /* duplicate V0 */ + paddsw %mm5, %mm3 /* V19 */ + psubsw %mm5, %mm4 /* V20 ;mm5 free */ +/* moved from the block below */ + movq 8*10(%ecx), %mm7 + psraw $1, %mm3 /* t74=t81 */ + movq %mm3, %mm6 /* duplicate t74=t81 */ + psraw $2, %mm4 /* t77=t79 */ + psubsw %mm0, %mm1 /* V21 ; mm0 free */ + paddsw %mm2, %mm3 /* V22 */ + movq %mm1, %mm5 /* duplicate V21 */ + paddsw %mm4, %mm1 /* V23 */ + movq %mm3, 8*4(%esi) /* V22 */ + psubsw %mm5, %mm4 /* V24; mm5 free */ + movq %mm1, 8*12(%esi) /* V23 */ + psubsw %mm2, %mm6 /* V25; mm2 free */ + movq %mm4, (%esi) /* V24 */ +/* keep mm6 alive all along the next block */ + /* movq %mm6, 8*8(%esi) V25 */ +/* column 0: odd part + * use V2, V6, V10, V14 to produce V31, V39, V40, V41 + */ +/* moved above: movq 8*10(%ecx), %mm7 */ + + pmulhw 8*10(%esi), %mm7 /* V10 */ + movq 8*6(%ecx), %mm0 + pmulhw 8*6(%esi), %mm0 /* V6 */ + movq 8*2(%ecx), %mm5 + movq %mm7, %mm3 /* duplicate V10 */ + pmulhw 8*2(%esi), %mm5 /* V2 */ + movq 8*14(%ecx), %mm4 + psubsw %mm0, %mm7 /* V26 */ + pmulhw 8*14(%esi), %mm4 /* V14 */ + paddsw %mm0, %mm3 /* V29 ; free mm0 */ + movq %mm7, %mm1 /* duplicate V26 */ + psraw $1, %mm3 /* t91=t94 */ + pmulhw x539f539f539f539f,%mm7 /* V33 */ + psraw $1, %mm1 /* t96 */ + movq %mm5, %mm0 /* duplicate V2 */ + psraw $2, %mm4 /* t85=t87 */ + paddsw %mm4,%mm5 /* V27 */ + psubsw %mm4, %mm0 /* V28 ; free mm4 */ + movq %mm0, %mm2 /* duplicate V28 */ + psraw $1, %mm5 /* t90=t93 */ + pmulhw x4546454645464546,%mm0 /* V35 */ + psraw $1, %mm2 /* t97 */ + movq %mm5, %mm4 /* duplicate t90=t93 */ + psubsw %mm2, %mm1 /* V32 ; free mm2 */ + pmulhw x61f861f861f861f8,%mm1 /* V36 */ + psllw $1, %mm7 /* t107 */ + paddsw %mm3, %mm5 /* V31 */ + psubsw %mm3, %mm4 /* V30 ; free mm3 */ + pmulhw x5a825a825a825a82,%mm4 /* V34 */ + nop + psubsw %mm1, %mm0 /* V38 */ + psubsw %mm7, %mm1 /* V37 ; free mm7 */ + psllw $1, %mm1 /* t114 */ +/* move from the next block */ + movq %mm6, %mm3 /* duplicate V25 */ +/* move from the next block */ + movq 8*4(%esi), %mm7 /* V22 */ + psllw $1, %mm0 /* t110 */ + psubsw %mm5, %mm0 /* V39 (mm5 needed for next block) */ + psllw $2, %mm4 /* t112 */ +/* moved from the next block */ + movq 8*12(%esi), %mm2 /* V23 */ + psubsw %mm0, %mm4 /* V40 */ + paddsw %mm4, %mm1 /* V41; free mm0 */ +/* moved from the next block */ + psllw $1, %mm2 /* t117=t125 */ +/* column 0: output butterfly */ +/* moved above: + * movq %mm6, %mm3 duplicate V25 + * movq 8*4(%esi), %mm7 V22 + * movq 8*12(%esi), %mm2 V23 + * psllw $1, %mm2 t117=t125 + */ + psubsw %mm1, %mm6 /* tm6 */ + paddsw %mm1, %mm3 /* tm8; free mm1 */ + movq %mm7, %mm1 /* duplicate V22 */ + paddsw %mm5, %mm7 /* tm0 */ + movq %mm3, 8*8(%esi) /* tm8; free mm3 */ + psubsw %mm5, %mm1 /* tm14; free mm5 */ + movq %mm6, 8*6(%esi) /* tm6; free mm6 */ + movq %mm2, %mm3 /* duplicate t117=t125 */ + movq (%esi), %mm6 /* V24 */ + paddsw %mm0, %mm2 /* tm2 */ + movq %mm7, (%esi) /* tm0; free mm7 */ + psubsw %mm0, %mm3 /* tm12; free mm0 */ + movq %mm1, 8*14(%esi) /* tm14; free mm1 */ + psllw $1, %mm6 /* t119=t123 */ + movq %mm2, 8*2(%esi) /* tm2; free mm2 */ + movq %mm6, %mm0 /* duplicate t119=t123 */ + movq %mm3, 8*12(%esi) /* tm12; free mm3 */ + paddsw %mm4, %mm6 /* tm4 */ +/* moved from next block */ + movq 8*5(%ecx), %mm1 + psubsw %mm4, %mm0 /* tm10; free mm4 */ +/* moved from next block */ + pmulhw 8*5(%esi), %mm1 /* V5 */ + movq %mm6, 8*4(%esi) /* tm4; free mm6 */ + movq %mm0, 8*10(%esi) /* tm10; free mm0 */ +/* column 1: even part + * use V5, V13, V1, V9 to produce V56..V59 + */ +/* moved to prev block: + * movq 8*5(%ecx), %mm1 + * pmulhw 8*5(%esi), %mm1 V5 + */ + movq 8*13(%ecx), %mm7 + psllw $1, %mm1 /* t128=t130 */ + pmulhw 8*13(%esi), %mm7 /* V13 */ + movq %mm1, %mm2 /* duplicate t128=t130 */ + movq 8(%ecx), %mm3 + pmulhw 8(%esi), %mm3 /* V1 */ + movq 8*9(%ecx), %mm5 + psubsw %mm7, %mm1 /* V50 */ + pmulhw 8*9(%esi), %mm5 /* V9 */ + paddsw %mm7, %mm2 /* V51 */ + pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */ + movq %mm2, %mm6 /* duplicate V51 */ + psraw $1, %mm2 /* t138=t144 */ + movq %mm3, %mm4 /* duplicate V1 */ + psraw $2, %mm6 /* t136 */ + paddsw %mm5, %mm3 /* V53 */ + psubsw %mm5, %mm4 /* V54 ;mm5 free */ + movq %mm3, %mm7 /* duplicate V53 */ +/* moved from next block */ + movq 8*11(%ecx), %mm0 + psraw $1, %mm4 /* t140=t142 */ + psubsw %mm6, %mm1 /* V55 ; mm6 free */ + paddsw %mm2, %mm3 /* V56 */ + movq %mm4, %mm5 /* duplicate t140=t142 */ + paddsw %mm1, %mm4 /* V57 */ + movq %mm3, 8*5(%esi) /* V56 */ + psubsw %mm1, %mm5 /* V58; mm1 free */ + movq %mm4, 8*13(%esi) /* V57 */ + psubsw %mm2, %mm7 /* V59; mm2 free */ + movq %mm5, 8*9(%esi) /* V58 */ +/* keep mm7 alive all along the next block + * movq %mm7, 8(%esi) V59 + * moved above + * movq 8*11(%ecx), %mm0 + */ + pmulhw 8*11(%esi), %mm0 /* V11 */ + movq 8*7(%ecx), %mm6 + pmulhw 8*7(%esi), %mm6 /* V7 */ + movq 8*15(%ecx), %mm4 + movq %mm0, %mm3 /* duplicate V11 */ + pmulhw 8*15(%esi), %mm4 /* V15 */ + movq 8*3(%ecx), %mm5 + psllw $1, %mm6 /* t146=t152 */ + pmulhw 8*3(%esi), %mm5 /* V3 */ + paddsw %mm6, %mm0 /* V63 */ +/* note that V15 computation has a correction step: + * this is a 'magic' constant that rebiases the results to be closer to the + * expected result. this magic constant can be refined to reduce the error + * even more by doing the correction step in a later stage when the number + * is actually multiplied by 16 + */ + paddw x0005000200010001, %mm4 + psubsw %mm6, %mm3 /* V60 ; free mm6 */ + psraw $1, %mm0 /* t154=t156 */ + movq %mm3, %mm1 /* duplicate V60 */ + pmulhw x539f539f539f539f, %mm1 /* V67 */ + movq %mm5, %mm6 /* duplicate V3 */ + psraw $2, %mm4 /* t148=t150 */ + paddsw %mm4, %mm5 /* V61 */ + psubsw %mm4, %mm6 /* V62 ; free mm4 */ + movq %mm5, %mm4 /* duplicate V61 */ + psllw $1, %mm1 /* t169 */ + paddsw %mm0, %mm5 /* V65 -> result */ + psubsw %mm0, %mm4 /* V64 ; free mm0 */ + pmulhw x5a825a825a825a82, %mm4 /* V68 */ + psraw $1, %mm3 /* t158 */ + psubsw %mm6, %mm3 /* V66 */ + movq %mm5, %mm2 /* duplicate V65 */ + pmulhw x61f861f861f861f8, %mm3 /* V70 */ + psllw $1, %mm6 /* t165 */ + pmulhw x4546454645464546, %mm6 /* V69 */ + psraw $1, %mm2 /* t172 */ +/* moved from next block */ + movq 8*5(%esi), %mm0 /* V56 */ + psllw $1, %mm4 /* t174 */ +/* moved from next block */ + psraw $1, %mm0 /* t177=t188 */ + nop + psubsw %mm3, %mm6 /* V72 */ + psubsw %mm1, %mm3 /* V71 ; free mm1 */ + psubsw %mm2, %mm6 /* V73 ; free mm2 */ +/* moved from next block */ + psraw $1, %mm5 /* t178=t189 */ + psubsw %mm6, %mm4 /* V74 */ +/* moved from next block */ + movq %mm0, %mm1 /* duplicate t177=t188 */ + paddsw %mm4, %mm3 /* V75 */ +/* moved from next block */ + paddsw %mm5, %mm0 /* tm1 */ +/* location + * 5 - V56 + * 13 - V57 + * 9 - V58 + * X - V59, mm7 + * X - V65, mm5 + * X - V73, mm6 + * X - V74, mm4 + * X - V75, mm3 + * free mm0, mm1 & mm2 + * moved above + * movq 8*5(%esi), %mm0 V56 + * psllw $1, %mm0 t177=t188 ! new !! + * psllw $1, %mm5 t178=t189 ! new !! + * movq %mm0, %mm1 duplicate t177=t188 + * paddsw %mm5, %mm0 tm1 + */ + movq 8*13(%esi), %mm2 /* V57 */ + psubsw %mm5, %mm1 /* tm15; free mm5 */ + movq %mm0, 8(%esi) /* tm1; free mm0 */ + psraw $1, %mm7 /* t182=t184 ! new !! */ +/* save the store as used directly in the transpose + * movq %mm1, 120(%esi) tm15; free mm1 + */ + movq %mm7, %mm5 /* duplicate t182=t184 */ + psubsw %mm3, %mm7 /* tm7 */ + paddsw %mm3, %mm5 /* tm9; free mm3 */ + movq 8*9(%esi), %mm0 /* V58 */ + movq %mm2, %mm3 /* duplicate V57 */ + movq %mm7, 8*7(%esi) /* tm7; free mm7 */ + psubsw %mm6, %mm3 /* tm13 */ + paddsw %mm6, %mm2 /* tm3 ; free mm6 */ +/* moved up from the transpose */ + movq %mm3, %mm7 +/* moved up from the transpose */ + punpcklwd %mm1, %mm3 + movq %mm0, %mm6 /* duplicate V58 */ + movq %mm2, 8*3(%esi) /* tm3; free mm2 */ + paddsw %mm4, %mm0 /* tm5 */ + psubsw %mm4, %mm6 /* tm11; free mm4 */ +/* moved up from the transpose */ + punpckhwd %mm1, %mm7 + movq %mm0, 8*5(%esi) /* tm5; free mm0 */ +/* moved up from the transpose */ + movq %mm5, %mm2 +/* transpose - M4 part + * --------- --------- + * | M1 | M2 | | M1'| M3'| + * --------- --> --------- + * | M3 | M4 | | M2'| M4'| + * --------- --------- + * Two alternatives: use full mmword approach so the following code can be + * scheduled before the transpose is done without stores, or use the faster + * half mmword stores (when possible) + */ + movd %mm3, 8*9+4(%esi) /* MS part of tmt9 */ + punpcklwd %mm6, %mm5 + movd %mm7, 8*13+4(%esi) /* MS part of tmt13 */ + punpckhwd %mm6, %mm2 + movd %mm5, 8*9(%esi) /* LS part of tmt9 */ + punpckhdq %mm3, %mm5 /* free mm3 */ + movd %mm2, 8*13(%esi) /* LS part of tmt13 */ + punpckhdq %mm7, %mm2 /* free mm7 */ +/* moved up from the M3 transpose */ + movq 8*8(%esi), %mm0 +/* moved up from the M3 transpose */ + movq 8*10(%esi), %mm1 +/* moved up from the M3 transpose */ + movq %mm0, %mm3 +/* shuffle the rest of the data, and write it with 2 mmword writes */ + movq %mm5, 8*11(%esi) /* tmt11 */ +/* moved up from the M3 transpose */ + punpcklwd %mm1, %mm0 + movq %mm2, 8*15(%esi) /* tmt15 */ +/* moved up from the M3 transpose */ + punpckhwd %mm1, %mm3 +/* transpose - M3 part + * moved up to previous code section + * movq 8*8(%esi), %mm0 + * movq 8*10(%esi), %mm1 + * movq %mm0, %mm3 + * punpcklwd %mm1, %mm0 + * punpckhwd %mm1, %mm3 + */ + movq 8*12(%esi), %mm6 + movq 8*14(%esi), %mm4 + movq %mm6, %mm2 +/* shuffle the data and write the lower parts of the transposed in 4 dwords */ + punpcklwd %mm4, %mm6 + movq %mm0, %mm1 + punpckhdq %mm6, %mm1 + movq %mm3, %mm7 + punpckhwd %mm4, %mm2 /* free mm4 */ + punpckldq %mm6, %mm0 /* free mm6 */ +/* moved from next block */ + movq 8*13(%esi), %mm4 /* tmt13 */ + punpckldq %mm2, %mm3 + punpckhdq %mm2, %mm7 /* free mm2 */ +/* moved from next block */ + movq %mm3, %mm5 /* duplicate tmt5 */ +/* column 1: even part (after transpose) +* moved above +* movq %mm3, %mm5 duplicate tmt5 +* movq 8*13(%esi), %mm4 tmt13 +*/ + psubsw %mm4, %mm3 /* V134 */ + pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */ + movq 8*9(%esi), %mm6 /* tmt9 */ + paddsw %mm4, %mm5 /* V135 ; mm4 free */ + movq %mm0, %mm4 /* duplicate tmt1 */ + paddsw %mm6, %mm0 /* V137 */ + psubsw %mm6, %mm4 /* V138 ; mm6 free */ + psllw $2, %mm3 /* t290 */ + psubsw %mm5, %mm3 /* V139 */ + movq %mm0, %mm6 /* duplicate V137 */ + paddsw %mm5, %mm0 /* V140 */ + movq %mm4, %mm2 /* duplicate V138 */ + paddsw %mm3, %mm2 /* V141 */ + psubsw %mm3, %mm4 /* V142 ; mm3 free */ + movq %mm0, 8*9(%esi) /* V140 */ + psubsw %mm5, %mm6 /* V143 ; mm5 free */ +/* moved from next block */ + movq 8*11(%esi), %mm0 /* tmt11 */ + movq %mm2, 8*13(%esi) /* V141 */ +/* moved from next block */ + movq %mm0, %mm2 /* duplicate tmt11 */ +/* column 1: odd part (after transpose) */ +/* moved up to the prev block + * movq 8*11(%esi), %mm0 tmt11 + * movq %mm0, %mm2 duplicate tmt11 + */ + movq 8*15(%esi), %mm5 /* tmt15 */ + psubsw %mm7, %mm0 /* V144 */ + movq %mm0, %mm3 /* duplicate V144 */ + paddsw %mm7, %mm2 /* V147 ; free mm7 */ + pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */ + movq %mm1, %mm7 /* duplicate tmt3 */ + paddsw %mm5, %mm7 /* V145 */ + psubsw %mm5, %mm1 /* V146 ; free mm5 */ + psubsw %mm1, %mm3 /* V150 */ + movq %mm7, %mm5 /* duplicate V145 */ + pmulhw x4546454645464546, %mm1 /* 17734-> V153 */ + psubsw %mm2, %mm5 /* V148 */ + pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */ + psllw $2, %mm0 /* t311 */ + pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */ + paddsw %mm2, %mm7 /* V149 ; free mm2 */ + psllw $1, %mm1 /* t313 */ + nop /* without the nop - freeze here for one clock */ + movq %mm3, %mm2 /* duplicate V154 */ + psubsw %mm0, %mm3 /* V155 ; free mm0 */ + psubsw %mm2, %mm1 /* V156 ; free mm2 */ +/* moved from the next block */ + movq %mm6, %mm2 /* duplicate V143 */ +/* moved from the next block */ + movq 8*13(%esi), %mm0 /* V141 */ + psllw $1, %mm1 /* t315 */ + psubsw %mm7, %mm1 /* V157 (keep V149) */ + psllw $2, %mm5 /* t317 */ + psubsw %mm1, %mm5 /* V158 */ + psllw $1, %mm3 /* t319 */ + paddsw %mm5, %mm3 /* V159 */ +/* column 1: output butterfly (after transform) + * moved to the prev block + * movq %mm6, %mm2 duplicate V143 + * movq 8*13(%esi), %mm0 V141 + */ + psubsw %mm3, %mm2 /* V163 */ + paddsw %mm3, %mm6 /* V164 ; free mm3 */ + movq %mm4, %mm3 /* duplicate V142 */ + psubsw %mm5, %mm4 /* V165 ; free mm5 */ + movq %mm2, scratch7 /* out7 */ + psraw $4, %mm6 + psraw $4, %mm4 + paddsw %mm5, %mm3 /* V162 */ + movq 8*9(%esi), %mm2 /* V140 */ + movq %mm0, %mm5 /* duplicate V141 */ +/* in order not to perculate this line up, + * we read 72(%esi) very near to this location + */ + movq %mm6, 8*9(%esi) /* out9 */ + paddsw %mm1, %mm0 /* V161 */ + movq %mm3, scratch5 /* out5 */ + psubsw %mm1, %mm5 /* V166 ; free mm1 */ + movq %mm4, 8*11(%esi) /* out11 */ + psraw $4, %mm5 + movq %mm0, scratch3 /* out3 */ + movq %mm2, %mm4 /* duplicate V140 */ + movq %mm5, 8*13(%esi) /* out13 */ + paddsw %mm7, %mm2 /* V160 */ +/* moved from the next block */ + movq 8(%esi), %mm0 + psubsw %mm7, %mm4 /* V167 ; free mm7 */ +/* moved from the next block */ + movq 8*3(%esi), %mm7 + psraw $4, %mm4 + movq %mm2, scratch1 /* out1 */ +/* moved from the next block */ + movq %mm0, %mm1 + movq %mm4, 8*15(%esi) /* out15 */ +/* moved from the next block */ + punpcklwd %mm7, %mm0 +/* transpose - M2 parts + * moved up to the prev block + * movq 8(%esi), %mm0 + * movq 8*3(%esi), %mm7 + * movq %mm0, %mm1 + * punpcklwd %mm7, %mm0 + */ + movq 8*5(%esi), %mm5 + punpckhwd %mm7, %mm1 + movq 8*7(%esi), %mm4 + movq %mm5, %mm3 +/* shuffle the data and write the lower parts of the trasposed in 4 dwords */ + movd %mm0, 8*8(%esi) /* LS part of tmt8 */ + punpcklwd %mm4, %mm5 + movd %mm1, 8*12(%esi) /* LS part of tmt12 */ + punpckhwd %mm4, %mm3 + movd %mm5, 8*8+4(%esi) /* MS part of tmt8 */ + punpckhdq %mm5, %mm0 /* tmt10 */ + movd %mm3, 8*12+4(%esi) /* MS part of tmt12 */ + punpckhdq %mm3, %mm1 /* tmt14 */ +/* transpose - M1 parts */ + movq (%esi), %mm7 + movq 8*2(%esi), %mm2 + movq %mm7, %mm6 + movq 8*4(%esi), %mm5 + punpcklwd %mm2, %mm7 + movq 8*6(%esi), %mm4 + punpckhwd %mm2, %mm6 /* free mm2 */ + movq %mm5, %mm3 + punpcklwd %mm4, %mm5 + punpckhwd %mm4, %mm3 /* free mm4 */ + movq %mm7, %mm2 + movq %mm6, %mm4 + punpckldq %mm5, %mm7 /* tmt0 */ + punpckhdq %mm5, %mm2 /* tmt2 ; free mm5 */ +/* shuffle the rest of the data, and write it with 2 mmword writes */ + punpckldq %mm3, %mm6 /* tmt4 */ +/* moved from next block */ + movq %mm2, %mm5 /* duplicate tmt2 */ + punpckhdq %mm3, %mm4 /* tmt6 ; free mm3 */ +/* moved from next block */ + movq %mm0, %mm3 /* duplicate tmt10 */ +/* column 0: odd part (after transpose) + *moved up to prev block + * movq %mm0, %mm3 duplicate tmt10 + * movq %mm2, %mm5 duplicate tmt2 + */ + psubsw %mm4, %mm0 /* V110 */ + paddsw %mm4, %mm3 /* V113 ; free mm4 */ + movq %mm0, %mm4 /* duplicate V110 */ + paddsw %mm1, %mm2 /* V111 */ + pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */ + psubsw %mm1, %mm5 /* V112 ; free mm1 */ + psubsw %mm5, %mm4 /* V116 */ + movq %mm2, %mm1 /* duplicate V111 */ + pmulhw x4546454645464546, %mm5 /* 17734-> V119 */ + psubsw %mm3, %mm2 /* V114 */ + pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */ + paddsw %mm3, %mm1 /* V115 ; free mm3 */ + pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */ + psllw $2, %mm0 /* t266 */ + movq %mm1, (%esi) /* save V115 */ + psllw $1, %mm5 /* t268 */ + psubsw %mm4, %mm5 /* V122 */ + psubsw %mm0, %mm4 /* V121 ; free mm0 */ + psllw $1, %mm5 /* t270 */ + psubsw %mm1, %mm5 /* V123 ; free mm1 */ + psllw $2, %mm2 /* t272 */ + psubsw %mm5, %mm2 /* V124 (keep V123) */ + psllw $1, %mm4 /* t274 */ + movq %mm5, 8*2(%esi) /* save V123 ; free mm5 */ + paddsw %mm2, %mm4 /* V125 (keep V124) */ +/* column 0: even part (after transpose) */ + movq 8*12(%esi), %mm0 /* tmt12 */ + movq %mm6, %mm3 /* duplicate tmt4 */ + psubsw %mm0, %mm6 /* V100 */ + paddsw %mm0, %mm3 /* V101 ; free mm0 */ + pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */ + movq %mm7, %mm5 /* duplicate tmt0 */ + movq 8*8(%esi), %mm1 /* tmt8 */ + paddsw %mm1, %mm7 /* V103 */ + psubsw %mm1, %mm5 /* V104 ; free mm1 */ + movq %mm7, %mm0 /* duplicate V103 */ + psllw $2, %mm6 /* t245 */ + paddsw %mm3, %mm7 /* V106 */ + movq %mm5, %mm1 /* duplicate V104 */ + psubsw %mm3, %mm6 /* V105 */ + psubsw %mm3, %mm0 /* V109; free mm3 */ + paddsw %mm6, %mm5 /* V107 */ + psubsw %mm6, %mm1 /* V108 ; free mm6 */ +/* column 0: output butterfly (after transform) */ + movq %mm1, %mm3 /* duplicate V108 */ + paddsw %mm2, %mm1 /* out4 */ + psraw $4, %mm1 + psubsw %mm2, %mm3 /* out10 ; free mm2 */ + psraw $4, %mm3 + movq %mm0, %mm6 /* duplicate V109 */ + movq %mm1, 8*4(%esi) /* out4 ; free mm1 */ + psubsw %mm4, %mm0 /* out6 */ + movq %mm3, 8*10(%esi) /* out10 ; free mm3 */ + psraw $4, %mm0 + paddsw %mm4, %mm6 /* out8 ; free mm4 */ + movq %mm7, %mm1 /* duplicate V106 */ + movq %mm0, 8*6(%esi) /* out6 ; free mm0 */ + psraw $4, %mm6 + movq (%esi), %mm4 /* V115 */ + movq %mm6, 8*8(%esi) /* out8 ; free mm6 */ + movq %mm5, %mm2 /* duplicate V107 */ + movq 8*2(%esi), %mm3 /* V123 */ + paddsw %mm4, %mm7 /* out0 */ +/* moved up from next block */ + movq scratch3, %mm0 + psraw $4, %mm7 +/* moved up from next block */ + movq scratch5, %mm6 + psubsw %mm4, %mm1 /* out14 ; free mm4 */ + paddsw %mm3, %mm5 /* out2 */ + psraw $4, %mm1 + movq %mm7, (%esi) /* out0 ; free mm7 */ + psraw $4, %mm5 + movq %mm1, 8*14(%esi) /* out14 ; free mm1 */ + psubsw %mm3, %mm2 /* out12 ; free mm3 */ + movq %mm5, 8*2(%esi) /* out2 ; free mm5 */ + psraw $4, %mm2 +/* moved up to the prev block */ + movq scratch7, %mm4 +/* moved up to the prev block */ + psraw $4, %mm0 + movq %mm2, 8*12(%esi) /* out12 ; free mm2 */ +/* moved up to the prev block */ + psraw $4, %mm6 +/* move back the data to its correct place +* moved up to the prev block + * movq scratch3, %mm0 + * movq scratch5, %mm6 + * movq scratch7, %mm4 + * psraw $4, %mm0 + * psraw $4, %mm6 +*/ + movq scratch1, %mm1 + psraw $4, %mm4 + movq %mm0, 8*3(%esi) /* out3 */ + psraw $4, %mm1 + movq %mm6, 8*5(%esi) /* out5 */ + movq %mm4, 8*7(%esi) /* out7 */ + movq %mm1, 8(%esi) /* out1 */ + emms + popl %edi + popl %esi + popl %edx + popl %ecx + popl %ebx + movl %ebp,%esp + popl %ebp + ret +.Lfe1: + .size gst_idct_mmx_idct,.Lfe1-gst_idct_mmx_idct diff --git a/libs/putbits/gstputbits.c b/libs/putbits/gstputbits.c index 76bff88687..9ec9750bb5 100644 --- a/libs/putbits/gstputbits.c +++ b/libs/putbits/gstputbits.c @@ -63,7 +63,7 @@ void gst_putbits(gst_putbits_t *pb, int val, int n) int i; unsigned int mask; - //printf("putbits: %d %d %ld\n", val, n, pb->outcnt); + //printf("putbits: %d %d %ld %ld\n", val, n, pb->outcnt, pb->newlen); mask = 1 << (n-1); /* selects first (leftmost) bit */ for (i=0; i #include #define GST_RIFF_ENCODER_BUF_SIZE 1024 -//#define debug(format,args...) g_print(format,##args) -#define debug(format,args...) - #define ADD_CHUNK(riffenc, chunkid, chunksize) \ { \ gst_riff_chunk *chunk;\ chunk = (gst_riff_chunk *)(riffenc->dataleft + riffenc->nextlikely);\ chunk->id = chunkid; \ chunk->size = chunksize; \ - riffenc->nextlikely += sizeof(gst_riff_chunk); \ + riffenc->nextlikely += sizeof(gst_riff_chunk) + (chunksize&1); \ } #define ADD_LIST(riffenc, listsize, listtype) \ @@ -49,7 +48,7 @@ GstRiff *gst_riff_encoder_new(guint32 type) { GstRiff *riff; gst_riff_list *list; - debug("gst_riff_encoder: making %4.4s encoder\n", (char *)&type); + DEBUG("gst_riff_encoder: making %4.4s encoder\n", (char *)&type); riff = (GstRiff *)g_malloc(sizeof(GstRiff)); g_return_val_if_fail(riff != NULL, NULL); @@ -77,7 +76,7 @@ gint gst_riff_encoder_avih(GstRiff *riff, gst_riff_avih *head, gulong size) { g_return_val_if_fail(riff->state == GST_RIFF_STATE_INITIAL, GST_RIFF_EINVAL); - debug("gst_riff_encoder: add avih\n"); + DEBUG("gst_riff_encoder: add avih\n"); ADD_LIST(riff, 0xB8, GST_RIFF_LIST_hdrl); @@ -97,7 +96,7 @@ gint gst_riff_encoder_strh(GstRiff *riff, guint32 fcc_type, gst_riff_strh *head, g_return_val_if_fail(riff->state == GST_RIFF_STATE_HASAVIH || riff->state == GST_RIFF_STATE_HASSTRF, GST_RIFF_EINVAL); - debug("gst_riff_encoder: add strh type %08x (%4.4s)\n", fcc_type, (char *)&fcc_type); + DEBUG("gst_riff_encoder: add strh type %08x (%4.4s)\n", fcc_type, (char *)&fcc_type); ADD_LIST(riff, 108, GST_RIFF_LIST_strl); @@ -118,7 +117,7 @@ gint gst_riff_encoder_strf(GstRiff *riff, void *format, gulong size) { g_return_val_if_fail(riff->state == GST_RIFF_STATE_HASSTRH, GST_RIFF_EINVAL); - debug("gst_riff_encoder: add strf\n"); + DEBUG("gst_riff_encoder: add strf\n"); ADD_CHUNK(riff, GST_RIFF_TAG_strf, size); @@ -141,14 +140,14 @@ gint gst_riff_encoder_chunk(GstRiff *riff, guint32 chunk_type, void *chunkdata, riff->state = GST_RIFF_STATE_MOVI; } - debug("gst_riff_encoder: add chunk type %08x (%4.4s)\n", chunk_type, (char *)&chunk_type); + DEBUG("gst_riff_encoder: add chunk type %08x (%4.4s)\n", chunk_type, (char *)&chunk_type); ADD_CHUNK(riff, chunk_type, size); if (chunkdata != NULL) { chunk = (gst_riff_chunk *)(riff->dataleft + riff->nextlikely); memcpy(chunk, chunkdata, size); - riff->nextlikely += size; + riff->nextlikely += size + (size&1); } return GST_RIFF_OK; diff --git a/libs/winloader/Makefile.am b/libs/winloader/Makefile.am index 8582a1715b..d9b4099ace 100644 --- a/libs/winloader/Makefile.am +++ b/libs/winloader/Makefile.am @@ -2,7 +2,7 @@ filterdir = $(libdir)/gst filter_LTLIBRARIES = libwinloader.la -libwinloader_la_SOURCES = driver.c elfdll.c ext.c externals.c module.c pe_image.c pe_resource.c registry.c resource.c stubs.s vfl.c +libwinloader_la_SOURCES = driver.c elfdll.c ext.c externals.c module.c pe_image.c pe_resource.c registry.c resource.c stubs.s vfl.c afl.c libwinloaderincludedir = $(includedir)/gst/libs/winloader.h libwinloaderinclude_HEADERS = diff --git a/libs/winloader/afl.c b/libs/winloader/afl.c new file mode 100644 index 0000000000..72546f3b49 --- /dev/null +++ b/libs/winloader/afl.c @@ -0,0 +1,758 @@ +/************************************************************************** + + + This file will contain an interface to ACM drivers. + Its content will be based mainly on wine/dlls/msacm32 + actually, for audio decompression only the following functions + are needed: + + acmStreamOpen ( takes formats of src and dest, returns stream handle ) + acmStreamPrepareHeader ( takes stream handler and info on data ) + acmStreamConvert ( the same as PrepareHeader ) + acmStreamUnprepareHeader + acmStreamClose + acmStreamSize + maybe acmStreamReset + + In future I'll also add functions for format enumeration, + but not right now. + + +***************************************************************************/ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "wineacm.h" +#pragma pack(1) +#define OpenDriverA DrvOpen +extern HDRVR VFWAPI DrvOpen(long); +#define CloseDriver DrvClose +extern HDRVR VFWAPI DrvClose(long); + +static PWINE_ACMSTREAM ACM_GetStream(HACMSTREAM has) +{ + return (PWINE_ACMSTREAM)has; +} + +/*********************************************************************** + * acmDriverAddA (MSACM32.2) + */ +MMRESULT WINAPI acmDriverAddA(PHACMDRIVERID phadid, HINSTANCE hinstModule, + LPARAM lParam, DWORD dwPriority, DWORD fdwAdd) +{ + if (!phadid) + return MMSYSERR_INVALPARAM; + + /* Check if any unknown flags */ + if (fdwAdd & + ~(ACM_DRIVERADDF_FUNCTION|ACM_DRIVERADDF_NOTIFYHWND| + ACM_DRIVERADDF_GLOBAL)) + return MMSYSERR_INVALFLAG; + + /* Check if any incompatible flags */ + if ((fdwAdd & ACM_DRIVERADDF_FUNCTION) && + (fdwAdd & ACM_DRIVERADDF_NOTIFYHWND)) + return MMSYSERR_INVALFLAG; + + /* FIXME: in fact, should GetModuleFileName(hinstModule) and do a + * LoadDriver on it, to be sure we can call SendDriverMessage on the + * hDrvr handle. + */ + *phadid = (HACMDRIVERID) MSACM_RegisterDriver(NULL, NULL, hinstModule); + + /* FIXME: lParam, dwPriority and fdwAdd ignored */ + + return MMSYSERR_NOERROR; +} + +/*********************************************************************** + * acmDriverClose (MSACM32.4) + */ +MMRESULT WINAPI acmDriverClose(HACMDRIVER had, DWORD fdwClose) +{ + PWINE_ACMDRIVER p; + PWINE_ACMDRIVER* tp; + + if (fdwClose) + return MMSYSERR_INVALFLAG; + + p = MSACM_GetDriver(had); + if (!p) + return MMSYSERR_INVALHANDLE; + + for (tp = &(p->obj.pACMDriverID->pACMDriverList); *tp; *tp = (*tp)->pNextACMDriver) { + if (*tp == p) { + *tp = (*tp)->pNextACMDriver; + break; + } + } + + if (p->hDrvr && !p->obj.pACMDriverID->pACMDriverList) + CloseDriver(p->hDrvr); + + HeapFree(MSACM_hHeap, 0, p); + + return MMSYSERR_NOERROR; +} + +/*********************************************************************** + * acmDriverEnum (MSACM32.7) + */ +MMRESULT WINAPI acmDriverEnum(ACMDRIVERENUMCB fnCallback, DWORD dwInstance, DWORD fdwEnum) +{ + PWINE_ACMDRIVERID p; + DWORD fdwSupport; + + if (!fnCallback) { + return MMSYSERR_INVALPARAM; + } + + if (fdwEnum && ~(ACM_DRIVERENUMF_NOLOCAL|ACM_DRIVERENUMF_DISABLED)) { + return MMSYSERR_INVALFLAG; + } + + for (p = MSACM_pFirstACMDriverID; p; p = p->pNextACMDriverID) { + fdwSupport = ACMDRIVERDETAILS_SUPPORTF_CODEC; + if (!p->bEnabled) { + if (fdwEnum & ACM_DRIVERENUMF_DISABLED) + fdwSupport |= ACMDRIVERDETAILS_SUPPORTF_DISABLED; + else + continue; + } + (*fnCallback)((HACMDRIVERID) p, dwInstance, fdwSupport); + } + + return MMSYSERR_NOERROR; +} + +/*********************************************************************** + * acmDriverID (MSACM32.8) + */ +MMRESULT WINAPI acmDriverID(HACMOBJ hao, PHACMDRIVERID phadid, DWORD fdwDriverID) +{ + PWINE_ACMOBJ pao; + + pao = MSACM_GetObj(hao); + if (!pao) + return MMSYSERR_INVALHANDLE; + + if (!phadid) + return MMSYSERR_INVALPARAM; + + if (fdwDriverID) + return MMSYSERR_INVALFLAG; + + *phadid = (HACMDRIVERID) pao->pACMDriverID; + + return MMSYSERR_NOERROR; +} + +/*********************************************************************** + * acmDriverMessage (MSACM32.9) + * FIXME + * Not implemented + */ +LRESULT WINAPI acmDriverMessage(HACMDRIVER had, UINT uMsg, LPARAM lParam1, LPARAM lParam2) +{ + PWINE_ACMDRIVER pad = MSACM_GetDriver(had); + if (!pad) + return MMSYSERR_INVALPARAM; + + /* FIXME: Check if uMsg legal */ + + if (!SendDriverMessage(pad->hDrvr, uMsg, lParam1, lParam2)) + return MMSYSERR_NOTSUPPORTED; + + return MMSYSERR_NOERROR; +} + + +/*********************************************************************** + * acmDriverOpen (MSACM32.10) + */ +MMRESULT WINAPI acmDriverOpen(PHACMDRIVER phad, HACMDRIVERID hadid, DWORD fdwOpen) +{ + PWINE_ACMDRIVERID padid; + PWINE_ACMDRIVER pad; + ICOPEN icopen; + HDRVR hdrv; + + + + TRACE("(%p, %x, %08lu)\n", phad, hadid, fdwOpen); + + if (!phad) + return MMSYSERR_INVALPARAM; + + padid = MSACM_GetDriverID(hadid); + if (!padid) + return MMSYSERR_INVALHANDLE; + + if (fdwOpen) + return MMSYSERR_INVALFLAG; + + pad = HeapAlloc(MSACM_hHeap, 0, sizeof(WINE_ACMDRIVER)); + if (!pad) return MMSYSERR_NOMEM; + + pad->obj.pACMDriverID = padid; + icopen.fccType = mmioFOURCC('a', 'u', 'd', 'c'); + icopen.fccHandler = (long)padid->pszFileName; + icopen.dwSize = sizeof(ICOPEN); + icopen.dwFlags = 0; + + if (!padid->hInstModule) + pad->hDrvr = OpenDriverA((long)&icopen); + else + pad->hDrvr = padid->hInstModule; + + if (!pad->hDrvr) { + HeapFree(MSACM_hHeap, 0, pad); + return MMSYSERR_ERROR; + } + + pad->pfnDriverProc = GetProcAddress(pad->hDrvr, "DriverProc"); + + /* insert new pad at beg of list */ + pad->pNextACMDriver = padid->pACMDriverList; + padid->pACMDriverList = pad; + + /* FIXME: Create a WINE_ACMDRIVER32 */ + *phad = (HACMDRIVER)pad; + + return MMSYSERR_NOERROR; +} + +/*********************************************************************** + * acmDriverRemove (MSACM32.12) + */ +MMRESULT WINAPI acmDriverRemove(HACMDRIVERID hadid, DWORD fdwRemove) +{ + PWINE_ACMDRIVERID padid; + + padid = MSACM_GetDriverID(hadid); + if (!padid) + return MMSYSERR_INVALHANDLE; + + if (fdwRemove) + return MMSYSERR_INVALFLAG; + + MSACM_UnregisterDriver(padid); + + return MMSYSERR_NOERROR; +} + + + +/**********************************************************************/ + +HANDLE MSACM_hHeap = (HANDLE) NULL; +PWINE_ACMDRIVERID MSACM_pFirstACMDriverID = NULL; +PWINE_ACMDRIVERID MSACM_pLastACMDriverID = NULL; + +/*********************************************************************** + * MSACM_RegisterDriver32() + */ +PWINE_ACMDRIVERID MSACM_RegisterDriver(LPSTR pszDriverAlias, LPSTR pszFileName, + HINSTANCE hinstModule) +// +// File names are stored in driver.c. I reuse this variable to store driver ID +// in it. If it's <0x10000, it is primary codec for corresponding format. +// +{ + PWINE_ACMDRIVERID padid; + + TRACE("('%s', '%x', 0x%08x)\n", pszDriverAlias, pszFileName, hinstModule); + + padid = (PWINE_ACMDRIVERID) HeapAlloc(MSACM_hHeap, 0, sizeof(WINE_ACMDRIVERID)); + padid->pszDriverAlias = (char*)malloc(strlen(pszDriverAlias)+1); + strcpy(padid->pszDriverAlias, pszDriverAlias); +// 1~strdup(pszDriverAlias); + padid->pszFileName = pszFileName; + padid->hInstModule = hinstModule; + padid->bEnabled = TRUE; + padid->pACMDriverList = NULL; + padid->pNextACMDriverID = NULL; + padid->pPrevACMDriverID = MSACM_pLastACMDriverID; + if (MSACM_pLastACMDriverID) + MSACM_pLastACMDriverID->pNextACMDriverID = padid; + MSACM_pLastACMDriverID = padid; + if (!MSACM_pFirstACMDriverID) + MSACM_pFirstACMDriverID = padid; + + return padid; +} + +/*********************************************************************** + * MSACM_RegisterAllDrivers32() + */ +void MSACM_RegisterAllDrivers(void) +{ + LPSTR pszBuffer; + DWORD dwBufferLength; + + if (MSACM_pFirstACMDriverID) + return; + + MSACM_RegisterDriver("divxa32", (LPSTR)0x161, 0); + MSACM_RegisterDriver("msadp32", (LPSTR)0x2, 0); +} + +/*********************************************************************** + * MSACM_UnregisterDriver32() + */ +PWINE_ACMDRIVERID MSACM_UnregisterDriver(PWINE_ACMDRIVERID p) +{ + PWINE_ACMDRIVERID pNextACMDriverID; + + while (p->pACMDriverList) + acmDriverClose((HACMDRIVER) p->pACMDriverList, 0); + + if (p->pszDriverAlias) + HeapFree(MSACM_hHeap, 0, p->pszDriverAlias); +// if (p->pszFileName) +// HeapFree(MSACM_hHeap, 0, p->pszFileName); + + if (p == MSACM_pFirstACMDriverID) + MSACM_pFirstACMDriverID = p->pNextACMDriverID; + if (p == MSACM_pLastACMDriverID) + MSACM_pLastACMDriverID = p->pPrevACMDriverID; + + if (p->pPrevACMDriverID) + p->pPrevACMDriverID->pNextACMDriverID = p->pNextACMDriverID; + if (p->pNextACMDriverID) + p->pNextACMDriverID->pPrevACMDriverID = p->pPrevACMDriverID; + + pNextACMDriverID = p->pNextACMDriverID; + + HeapFree(MSACM_hHeap, 0, p); + + return pNextACMDriverID; +} + +/*********************************************************************** + * MSACM_UnregisterAllDrivers32() + * FIXME + * Where should this function be called? + */ +void MSACM_UnregisterAllDrivers(void) +{ + PWINE_ACMDRIVERID p; + + for (p = MSACM_pFirstACMDriverID; p; p = MSACM_UnregisterDriver(p)); +} + +/*********************************************************************** + * MSACM_GetDriverID32() + */ +PWINE_ACMDRIVERID MSACM_GetDriverID(HACMDRIVERID hDriverID) +{ + return (PWINE_ACMDRIVERID)hDriverID; +} + +/*********************************************************************** + * MSACM_GetDriver32() + */ +PWINE_ACMDRIVER MSACM_GetDriver(HACMDRIVER hDriver) +{ + return (PWINE_ACMDRIVER)hDriver; +} + +/*********************************************************************** + * MSACM_GetObj32() + */ +PWINE_ACMOBJ MSACM_GetObj(HACMOBJ hObj) +{ + return (PWINE_ACMOBJ)hObj; +} + + + +/*********************************************************************** + * acmStreamOpen (MSACM32.40) + */ +MMRESULT WINAPI acmStreamOpen(PHACMSTREAM phas, HACMDRIVER had, PWAVEFORMATEX pwfxSrc, + PWAVEFORMATEX pwfxDst, PWAVEFILTER pwfltr, DWORD dwCallback, + DWORD dwInstance, DWORD fdwOpen) +{ + PWINE_ACMSTREAM was; + PWINE_ACMDRIVER wad; + MMRESULT ret; + int wfxSrcSize; + int wfxDstSize; + + TRACE("(%p, 0x%08x, %p, %p, %p, %ld, %ld, %ld)\n", + phas, had, pwfxSrc, pwfxDst, pwfltr, dwCallback, dwInstance, fdwOpen); + + TRACE("src [wFormatTag=%u, nChannels=%u, nSamplesPerSec=%lu, nAvgBytesPerSec=%lu, nBlockAlign=%u, wBitsPerSample=%u, cbSize=%u]\n", + pwfxSrc->wFormatTag, pwfxSrc->nChannels, pwfxSrc->nSamplesPerSec, pwfxSrc->nAvgBytesPerSec, + pwfxSrc->nBlockAlign, pwfxSrc->wBitsPerSample, pwfxSrc->cbSize); + + TRACE("dst [wFormatTag=%u, nChannels=%u, nSamplesPerSec=%lu, nAvgBytesPerSec=%lu, nBlockAlign=%u, wBitsPerSample=%u, cbSize=%u]\n", + pwfxDst->wFormatTag, pwfxDst->nChannels, pwfxDst->nSamplesPerSec, pwfxDst->nAvgBytesPerSec, + pwfxDst->nBlockAlign, pwfxDst->wBitsPerSample, pwfxDst->cbSize); + +#define SIZEOF_WFX(wfx) (sizeof(WAVEFORMATEX) + ((wfx->wFormatTag == WAVE_FORMAT_PCM) ? 0 : wfx->cbSize)) + wfxSrcSize = SIZEOF_WFX(pwfxSrc); + wfxDstSize = SIZEOF_WFX(pwfxDst); +#undef SIZEOF_WFX + + was = HeapAlloc(MSACM_hHeap, 0, sizeof(*was) + wfxSrcSize + wfxDstSize + ((pwfltr) ? sizeof(WAVEFILTER) : 0)); + if (was == NULL) + return MMSYSERR_NOMEM; + + was->drvInst.cbStruct = sizeof(was->drvInst); + was->drvInst.pwfxSrc = (PWAVEFORMATEX)((LPSTR)was + sizeof(*was)); + memcpy(was->drvInst.pwfxSrc, pwfxSrc, wfxSrcSize); + was->drvInst.pwfxDst = (PWAVEFORMATEX)((LPSTR)was + sizeof(*was) + wfxSrcSize); + memcpy(was->drvInst.pwfxDst, pwfxDst, wfxDstSize); + if (pwfltr) { + was->drvInst.pwfltr = (PWAVEFILTER)((LPSTR)was + sizeof(*was) + wfxSrcSize + wfxDstSize); + memcpy(was->drvInst.pwfltr, pwfltr, sizeof(WAVEFILTER)); + } else { + was->drvInst.pwfltr = NULL; + } + was->drvInst.dwCallback = dwCallback; + was->drvInst.dwInstance = dwInstance; + was->drvInst.fdwOpen = fdwOpen; + was->drvInst.fdwDriver = 0L; + was->drvInst.dwDriver = 0L; + was->drvInst.has = (HACMSTREAM)was; + + if (had) { + if (!(wad = MSACM_GetDriver(had))) { + ret = MMSYSERR_INVALPARAM; + goto errCleanUp; + } + + was->obj.pACMDriverID = wad->obj.pACMDriverID; + was->pDrv = wad; + was->hAcmDriver = 0; /* not to close it in acmStreamClose */ + + ret = SendDriverMessage(wad->hDrvr, ACMDM_STREAM_OPEN, (DWORD)&was->drvInst, 0L); + if (ret != MMSYSERR_NOERROR) + goto errCleanUp; + } else { + PWINE_ACMDRIVERID wadi; + short drv_tag; + ret = ACMERR_NOTPOSSIBLE; +/* if(pwfxSrc->wFormatTag==1)//compression + drv_tag=pwfxDst->wFormatTag; + else + if(pwfxDst->wFormatTag==1)//decompression + drv_tag=pwfxSrc->wFormatTag; + else + goto errCleanUp; + + ret=acmDriverOpen2(drv_tag); + if (ret == MMSYSERR_NOERROR) { + if ((wad = MSACM_GetDriver(had)) != 0) { + was->obj.pACMDriverID = wad->obj.pACMDriverID; + was->pDrv = wad; + was->hAcmDriver = had; + + ret = SendDriverMessage(wad->hDrvr, ACMDM_STREAM_OPEN, (DWORD)&was->drvInst, 0L); + if (ret == MMSYSERR_NOERROR) { + if (fdwOpen & ACM_STREAMOPENF_QUERY) { + acmDriverClose(had, 0L); + } + break; + } + } + acmDriverClose(had, 0L);*/ + if(MSACM_pFirstACMDriverID==NULL) + MSACM_RegisterAllDrivers(); + + for (wadi = MSACM_pFirstACMDriverID; wadi; wadi = wadi->pNextACMDriverID) { + ret = acmDriverOpen(&had, (HACMDRIVERID)wadi, 0L); + if (ret == MMSYSERR_NOERROR) { + if ((wad = MSACM_GetDriver(had)) != 0) { + was->obj.pACMDriverID = wad->obj.pACMDriverID; + was->pDrv = wad; + was->hAcmDriver = had; + + ret = SendDriverMessage(wad->hDrvr, ACMDM_STREAM_OPEN, (DWORD)&was->drvInst, 0L); + if (ret == MMSYSERR_NOERROR) { + if (fdwOpen & ACM_STREAMOPENF_QUERY) { + acmDriverClose(had, 0L); + } + break; + } + } + // no match, close this acm driver and try next one + acmDriverClose(had, 0L); + } + } + if (ret != MMSYSERR_NOERROR) { + ret = ACMERR_NOTPOSSIBLE; + goto errCleanUp; + } + } + ret = MMSYSERR_NOERROR; + if (!(fdwOpen & ACM_STREAMOPENF_QUERY)) { + if (phas) + *phas = (HACMSTREAM)was; + TRACE("=> (%d)\n", ret); + return ret; + } +errCleanUp: + if (phas) + *phas = (HACMSTREAM)0; + HeapFree(MSACM_hHeap, 0, was); + TRACE("=> (%d)\n", ret); + return ret; +} + + +MMRESULT WINAPI acmStreamClose(HACMSTREAM has, DWORD fdwClose) +{ + PWINE_ACMSTREAM was; + MMRESULT ret; + + TRACE("(0x%08x, %ld)\n", has, fdwClose); + + if ((was = ACM_GetStream(has)) == NULL) { + return MMSYSERR_INVALHANDLE; + } + ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_CLOSE, (DWORD)&was->drvInst, 0); + if (ret == MMSYSERR_NOERROR) { + if (was->hAcmDriver) + acmDriverClose(was->hAcmDriver, 0L); + HeapFree(MSACM_hHeap, 0, was); + } + TRACE("=> (%d)\n", ret); + return ret; +} + +/*********************************************************************** + * acmStreamConvert (MSACM32.38) + */ +MMRESULT WINAPI acmStreamConvert(HACMSTREAM has, PACMSTREAMHEADER pash, + DWORD fdwConvert) +{ + PWINE_ACMSTREAM was; + MMRESULT ret = MMSYSERR_NOERROR; + PACMDRVSTREAMHEADER padsh; + + TRACE("(0x%08x, %p, %ld)\n", has, pash, fdwConvert); + + if ((was = ACM_GetStream(has)) == NULL) + return MMSYSERR_INVALHANDLE; + if (!pash || pash->cbStruct < sizeof(ACMSTREAMHEADER)) + return MMSYSERR_INVALPARAM; + + if (!(pash->fdwStatus & ACMSTREAMHEADER_STATUSF_PREPARED)) + return ACMERR_UNPREPARED; + + /* Note: the ACMSTREAMHEADER and ACMDRVSTREAMHEADER structs are of same + * size. some fields are private to msacm internals, and are exposed + * in ACMSTREAMHEADER in the dwReservedDriver array + */ + padsh = (PACMDRVSTREAMHEADER)pash; + + /* check that pointers have not been modified */ + if (padsh->pbPreparedSrc != padsh->pbSrc || + padsh->cbPreparedSrcLength < padsh->cbSrcLength || + padsh->pbPreparedDst != padsh->pbDst || + padsh->cbPreparedDstLength < padsh->cbDstLength) { + return MMSYSERR_INVALPARAM; + } + + padsh->fdwConvert = fdwConvert; + + ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_CONVERT, (DWORD)&was->drvInst, (DWORD)padsh); + if (ret == MMSYSERR_NOERROR) { + padsh->fdwStatus |= ACMSTREAMHEADER_STATUSF_DONE; + } + TRACE("=> (%d)\n", ret); + return ret; +} + + +/*********************************************************************** + * acmStreamPrepareHeader (MSACM32.41) + */ +MMRESULT WINAPI acmStreamPrepareHeader(HACMSTREAM has, PACMSTREAMHEADER pash, + DWORD fdwPrepare) +{ + PWINE_ACMSTREAM was; + MMRESULT ret = MMSYSERR_NOERROR; + PACMDRVSTREAMHEADER padsh; + + TRACE("(0x%08x, %p, %ld)\n", has, pash, fdwPrepare); + + if ((was = ACM_GetStream(has)) == NULL) + return MMSYSERR_INVALHANDLE; + if (!pash || pash->cbStruct < sizeof(ACMSTREAMHEADER)) + return MMSYSERR_INVALPARAM; + if (fdwPrepare) + ret = MMSYSERR_INVALFLAG; + + if (pash->fdwStatus & ACMSTREAMHEADER_STATUSF_DONE) + return MMSYSERR_NOERROR; + + /* Note: the ACMSTREAMHEADER and ACMDRVSTREAMHEADER structs are of same + * size. some fields are private to msacm internals, and are exposed + * in ACMSTREAMHEADER in the dwReservedDriver array + */ + padsh = (PACMDRVSTREAMHEADER)pash; + + padsh->fdwConvert = fdwPrepare; + padsh->padshNext = NULL; + padsh->fdwDriver = padsh->dwDriver = 0L; + + padsh->fdwPrepared = 0; + padsh->dwPrepared = 0; + padsh->pbPreparedSrc = 0; + padsh->cbPreparedSrcLength = 0; + padsh->pbPreparedDst = 0; + padsh->cbPreparedDstLength = 0; + + ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_PREPARE, (DWORD)&was->drvInst, (DWORD)padsh); + if (ret == MMSYSERR_NOERROR || ret == MMSYSERR_NOTSUPPORTED) { + ret = MMSYSERR_NOERROR; + padsh->fdwStatus &= ~(ACMSTREAMHEADER_STATUSF_DONE|ACMSTREAMHEADER_STATUSF_INQUEUE); + padsh->fdwStatus |= ACMSTREAMHEADER_STATUSF_PREPARED; + padsh->fdwPrepared = padsh->fdwStatus; + padsh->dwPrepared = 0; + padsh->pbPreparedSrc = padsh->pbSrc; + padsh->cbPreparedSrcLength = padsh->cbSrcLength; + padsh->pbPreparedDst = padsh->pbDst; + padsh->cbPreparedDstLength = padsh->cbDstLength; + } else { + padsh->fdwPrepared = 0; + padsh->dwPrepared = 0; + padsh->pbPreparedSrc = 0; + padsh->cbPreparedSrcLength = 0; + padsh->pbPreparedDst = 0; + padsh->cbPreparedDstLength = 0; + } + TRACE("=> (%d)\n", ret); + return ret; +} + +/*********************************************************************** + * acmStreamReset (MSACM32.42) + */ +MMRESULT WINAPI acmStreamReset(HACMSTREAM has, DWORD fdwReset) +{ + PWINE_ACMSTREAM was; + MMRESULT ret = MMSYSERR_NOERROR; + + TRACE("(0x%08x, %ld)\n", has, fdwReset); + + if (fdwReset) { + ret = MMSYSERR_INVALFLAG; + } else if ((was = ACM_GetStream(has)) == NULL) { + return MMSYSERR_INVALHANDLE; + } else if (was->drvInst.fdwOpen & ACM_STREAMOPENF_ASYNC) { + ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_RESET, (DWORD)&was->drvInst, 0); + } + TRACE("=> (%d)\n", ret); + return ret; +} + +/*********************************************************************** + * acmStreamSize (MSACM32.43) + */ +MMRESULT WINAPI acmStreamSize(HACMSTREAM has, DWORD cbInput, + LPDWORD pdwOutputBytes, DWORD fdwSize) +{ + PWINE_ACMSTREAM was; + ACMDRVSTREAMSIZE adss; + MMRESULT ret; + + TRACE("(0x%08x, %ld, %p, %ld)\n", has, cbInput, pdwOutputBytes, fdwSize); + + if ((was = ACM_GetStream(has)) == NULL) { + return MMSYSERR_INVALHANDLE; + } + if ((fdwSize & ~ACM_STREAMSIZEF_QUERYMASK) != 0) { + return MMSYSERR_INVALFLAG; + } + + *pdwOutputBytes = 0L; + + switch (fdwSize & ACM_STREAMSIZEF_QUERYMASK) { + case ACM_STREAMSIZEF_DESTINATION: + adss.cbDstLength = cbInput; + adss.cbSrcLength = 0; + break; + case ACM_STREAMSIZEF_SOURCE: + adss.cbSrcLength = cbInput; + adss.cbDstLength = 0; + break; + default: + return MMSYSERR_INVALFLAG; + } + + adss.cbStruct = sizeof(adss); + adss.fdwSize = fdwSize; + ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_SIZE, + (DWORD)&was->drvInst, (DWORD)&adss); + if (ret == MMSYSERR_NOERROR) { + switch (fdwSize & ACM_STREAMSIZEF_QUERYMASK) { + case ACM_STREAMSIZEF_DESTINATION: + *pdwOutputBytes = adss.cbSrcLength; + break; + case ACM_STREAMSIZEF_SOURCE: + *pdwOutputBytes = adss.cbDstLength; + break; + } + } + TRACE("=> (%d) [%lu]\n", ret, *pdwOutputBytes); + return ret; +} + +/*********************************************************************** + * acmStreamUnprepareHeader (MSACM32.44) + */ +MMRESULT WINAPI acmStreamUnprepareHeader(HACMSTREAM has, PACMSTREAMHEADER pash, + DWORD fdwUnprepare) +{ + PWINE_ACMSTREAM was; + MMRESULT ret = MMSYSERR_NOERROR; + PACMDRVSTREAMHEADER padsh; + + TRACE("(0x%08x, %p, %ld)\n", has, pash, fdwUnprepare); + + if ((was = ACM_GetStream(has)) == NULL) + return MMSYSERR_INVALHANDLE; + if (!pash || pash->cbStruct < sizeof(ACMSTREAMHEADER)) + return MMSYSERR_INVALPARAM; + + if (!(pash->fdwStatus & ACMSTREAMHEADER_STATUSF_PREPARED)) + return ACMERR_UNPREPARED; + + /* Note: the ACMSTREAMHEADER and ACMDRVSTREAMHEADER structs are of same + * size. some fields are private to msacm internals, and are exposed + * in ACMSTREAMHEADER in the dwReservedDriver array + */ + padsh = (PACMDRVSTREAMHEADER)pash; + + /* check that pointers have not been modified */ + if (padsh->pbPreparedSrc != padsh->pbSrc || + padsh->cbPreparedSrcLength < padsh->cbSrcLength || + padsh->pbPreparedDst != padsh->pbDst || + padsh->cbPreparedDstLength < padsh->cbDstLength) { + return MMSYSERR_INVALPARAM; + } + + padsh->fdwConvert = fdwUnprepare; + + ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_UNPREPARE, (DWORD)&was->drvInst, (DWORD)padsh); + if (ret == MMSYSERR_NOERROR || ret == MMSYSERR_NOTSUPPORTED) { + ret = MMSYSERR_NOERROR; + padsh->fdwStatus &= ~(ACMSTREAMHEADER_STATUSF_DONE|ACMSTREAMHEADER_STATUSF_INQUEUE|ACMSTREAMHEADER_STATUSF_PREPARED); + } + TRACE("=> (%d)\n", ret); + return ret; +} diff --git a/libs/winloader/driver.c b/libs/winloader/driver.c index 7e319b0b7b..cdf009e563 100644 --- a/libs/winloader/driver.c +++ b/libs/winloader/driver.c @@ -6,8 +6,6 @@ #include #include -#include - #define STORE_ALL \ __asm__ ( \ "push %%ebx\n\t" \ @@ -39,6 +37,26 @@ typedef DRVR *LPDRVR; static DWORD dwDrvID = 0; +LRESULT WINAPI SendDriverMessage( HDRVR hDriver, UINT message, + LPARAM lParam1, LPARAM lParam2 ) +{ + DRVR* module=(DRVR*)hDriver; + int result; +#ifdef DETAILED_OUT + printf("SendDriverMessage: driver %X, message %X, arg1 %X, arg2 %X\n", hDriver, message, lParam1, lParam2); +#endif + if(module==0)return -1; + if(module->hDriverModule==0)return -1; + if(module->DriverProc==0)return -1; + STORE_ALL; + result=module->DriverProc(module->dwDriverID,1,message,lParam1,lParam2); + REST_ALL; +#ifdef DETAILED_OUT + printf("\t\tResult: %X\n", result); +#endif + return result; +} + static NPDRVR DrvAlloc(HDRVR*lpDriver, LPUINT lpDrvResult) { NPDRVR npDriver; @@ -66,28 +84,44 @@ typedef struct int usage; }codec_t; -static codec_t codecs[3]={ - {0, PLUGINS_SRCDIR "/win32/divxc32.dll", 0}, - {0, PLUGINS_SRCDIR "/win32/ir50_32.dll", 0}, -// {0, "./mpg4c32.dll", 0}, - {0, PLUGINS_SRCDIR "/win32/libvideodll.so", 0}, +#define Win32Path "/usr/lib/win32/" +static codec_t avi_codecs[]={ + {0, Win32Path"divxc32.dll", 0}, //0 + {0, Win32Path"ir50_32.dll", 0}, + {0, Win32Path"ir41_32.dll", 0}, + {0, Win32Path"ir32_32.dll", 0}, + {0, Win32Path"mpg4c32.dll", 0}, + {0, Win32Path"iccvid.dll", 0}, //5 + {0, Win32Path"libvideodll.so", 0}, + {0, Win32Path"divxa32.acm", 0}, //7 + {0, Win32Path"msadp32.acm", 0}, }; static void DrvFree(HDRVR hDriver) { int i; - FreeLibrary(((DRVR*)hDriver)->hDriverModule); if(hDriver) - for(i=0; ihDriverModule) + if(((DRVR*)hDriver)->hDriverModule) + if(((DRVR*)hDriver)->DriverProc) + (((DRVR*)hDriver)->DriverProc)(((DRVR*)hDriver)->dwDriverID, hDriver, DRV_CLOSE, 0, 0); + if(hDriver) + for(i=0; ihDriverModule) { - codecs[i].handle=0; - codecs[i].usage--; - if (hDriver) - free((NPDRVR)hDriver); - return; - } + avi_codecs[i].usage--; + if(avi_codecs[i].usage==0) + { + avi_codecs[i].handle=0; + if(((DRVR*)hDriver)->hDriverModule) + if(((DRVR*)hDriver)->DriverProc) + (((DRVR*)hDriver)->DriverProc)(0, hDriver, DRV_FREE, 0, 0); + FreeLibrary(((DRVR*)hDriver)->hDriverModule); + if (hDriver) + free((NPDRVR)hDriver); + return; + } + } } void DrvClose(HDRVR hdrvr) @@ -110,46 +144,85 @@ DrvOpen(LPARAM lParam2) int regs[10]; int fccHandler=*((int*)lParam2+2); - switch(fccHandler) - { - case mmioFOURCC('D', 'I', 'V', '3'): - case mmioFOURCC('D', 'I', 'V', '4'): - case mmioFOURCC('d', 'i', 'v', '3'): - case mmioFOURCC('d', 'i', 'v', '4'): - drv_id=0; - break; - case mmioFOURCC('I', 'V', '5', '0'): - case mmioFOURCC('i', 'v', '5', '0'): - drv_id=1; - break; - case mmioFOURCC('m', 'p', '4', '3'): - case mmioFOURCC('M', 'P', 'G', '4'): - drv_id=2; - break; - default: - printf("Unknown codec %X='%c%c%c%c'\n", fccHandler, - fccHandler&0xFF, (fccHandler&0xFF00)>>8, - (fccHandler&0xFF0000)>>16, (fccHandler&0xFF000000)>>24); - return (HDRVR)0; - } - + int fccType=*((int*)lParam2+1); + if(fccType==0x63646976)//vidc + switch(fccHandler) + { + case mmioFOURCC('D', 'I', 'V', '3'): + case mmioFOURCC('D', 'I', 'V', '4'): + case mmioFOURCC('d', 'i', 'v', '3'): + case mmioFOURCC('d', 'i', 'v', '4'): + printf("Video in DivX ;-) format\n"); + drv_id=0; + break; + case mmioFOURCC('I', 'V', '5', '0'): + case mmioFOURCC('i', 'v', '5', '0'): + printf("Video in Indeo Video 5 format\n"); + drv_id=1; + break; + case mmioFOURCC('I', 'V', '4', '1'): + case mmioFOURCC('i', 'v', '4', '1'): + printf("Video in Indeo Video 4.1 format\n"); + drv_id=2; + break; + case mmioFOURCC('I', 'V', '3', '2'): + case mmioFOURCC('i', 'v', '3', '2'): + printf("Video in Indeo Video 3.2 format\n"); + drv_id=3; + break; + + case mmioFOURCC('m', 'p', '4', '1'): + case mmioFOURCC('m', 'p', '4', '2'): + case mmioFOURCC('m', 'p', '4', '3'): + case mmioFOURCC('M', 'P', 'G', '4'): + case mmioFOURCC('M', 'P', '4', '1'): + case mmioFOURCC('M', 'P', '4', '2'): + case mmioFOURCC('M', 'P', '4', '3'): + printf("Video in Microsoft MPEG-4 format\n"); + drv_id=4; + break; + case mmioFOURCC('c', 'v', 'i', 'd'): + printf("Video in Cinepak format\n"); + drv_id=5; + break; + default: + printf("Unknown codec %X='%c%c%c%c'\n", fccHandler, + fccHandler&0xFF, (fccHandler&0xFF00)>>8, + (fccHandler&0xFF0000)>>16, (fccHandler&0xFF000000)>>24); + return (HDRVR)0; + } + else + switch(fccHandler) + { + case 0x160://DivX audio + case 0x161://DivX audio + drv_id=7; + break; + case 0x2://MS ADPCM + drv_id=8; + break; + default: + printf("Unknown ACM codec 0x%X\n", fccHandler); + return (HDRVR)0; + } if (!(npDriver = DrvAlloc(&hDriver, &uDrvResult))) return ((HDRVR) 0); - if(codecs[drv_id].handle==0) + if(avi_codecs[drv_id].handle==0) { - if (!(codecs[drv_id].handle=npDriver->hDriverModule = LoadLibraryA(codecs[drv_id].name))) + if (!(avi_codecs[drv_id].handle=npDriver->hDriverModule = LoadLibraryA(avi_codecs[drv_id].name))) { + printf("Can't open library %s\n", avi_codecs[drv_id].name); DrvFree(hDriver); return ((HDRVR) 0); } - else codecs[drv_id].usage=1; + else avi_codecs[drv_id].usage=1; } else { - npDriver->hDriverModule=codecs[drv_id].handle; - codecs[drv_id].usage++; + npDriver->hDriverModule=avi_codecs[drv_id].handle; + avi_codecs[drv_id].usage++; } // 14c0 @@ -173,28 +246,29 @@ DrvOpen(LPARAM lParam2) no_reg: ; } - + if (!(npDriver->DriverProc = (DRIVERPROC) GetProcAddress(npDriver->hDriverModule, "DriverProc"))) { + printf("Library %s is not a valid codec\n", avi_codecs[drv_id].name); FreeLibrary(npDriver->hDriverModule); DrvFree(hDriver); return ((HDRVR) 0); } - //printf("DriverProc == %X\n", npDriver->DriverProc); + TRACE("DriverProc == %X\n", npDriver->DriverProc); npDriver->dwDriverID = ++dwDrvID; - if (codecs[drv_id].usage==1) + if (avi_codecs[drv_id].usage==1) { STORE_ALL; (npDriver->DriverProc)(0, hDriver, DRV_LOAD, 0, 0); REST_ALL; - //printf("DRV_LOAD Ok!\n"); + TRACE("DRV_LOAD Ok!\n"); STORE_ALL; (npDriver->DriverProc)(0, hDriver, DRV_ENABLE, 0, 0); REST_ALL; - //printf("DRV_ENABLE Ok!\n"); + TRACE("DRV_ENABLE Ok!\n"); } // open driver @@ -203,7 +277,7 @@ DrvOpen(LPARAM lParam2) (LPARAM) (LPSTR) unknown, lParam2); REST_ALL; - //printf("DRV_OPEN Ok!(%X)\n", npDriver->dwDriverID); + TRACE("DRV_OPEN Ok!(%X)\n", npDriver->dwDriverID); if (uDrvResult) { diff --git a/libs/winloader/elfdll.c b/libs/winloader/elfdll.c index 87846c03e8..b65e2287c2 100644 --- a/libs/winloader/elfdll.c +++ b/libs/winloader/elfdll.c @@ -3,6 +3,9 @@ * * Copyright 1999 Bertho A. Stultiens */ +#include + +#ifdef HAVE_LIBDL #include #include @@ -42,7 +45,7 @@ extern DWORD fixup_imports(WINE_MODREF *wm); extern void dump_exports(HMODULE hModule); /*---------------- END HACKS ---------------*/ -char *extra_ld_library_path = NULL; /* The extra search-path set in wine.conf */ +char *extra_ld_library_path = "/usr/lib/win32"; struct elfdll_image { @@ -201,7 +204,8 @@ static WINE_MODREF *ELFDLL_CreateModref(HMODULE hModule, LPCSTR path) // wm->binfmt.pe.pe_resource = (PIMAGE_RESOURCE_DIRECTORY)RVA(hModule, dir->VirtualAddress); - wm->filename = strdup( path ); + wm->filename = malloc(strlen(path)+1); + strcpy(wm->filename, path); wm->modname = strrchr( wm->filename, '\\' ); if (!wm->modname) wm->modname = wm->filename; else wm->modname++; @@ -275,7 +279,7 @@ WINE_MODREF *ELFDLL_LoadLibraryExA(LPCSTR path, DWORD flags) } */ - wm = ELFDLL_CreateModref(dlhandle, path); + wm = ELFDLL_CreateModref((int)dlhandle, path); if(!wm) { ERR("Could not create WINE_MODREF for %s\n", path); @@ -297,4 +301,4 @@ void ELFDLL_UnloadLibrary(WINE_MODREF *wm) { } - +#endif /*HAVE_LIBDL*/ diff --git a/libs/winloader/ext.c b/libs/winloader/ext.c index 603ae75465..e4a042ef4e 100644 --- a/libs/winloader/ext.c +++ b/libs/winloader/ext.c @@ -11,8 +11,9 @@ #include #include #include - +#include #include +//#include int dbg_header_err( const char *dbg_channel, const char *func ) { return 0; @@ -35,6 +36,12 @@ int dbg_vprintf( const char *format, ... ) } int __vprintf( const char *format, ... ) { +#ifdef DETAILED_OUT + va_list va; + va_start(va, format); + vprintf(format, va); + va_end(va); +#endif return 0; } @@ -56,17 +63,7 @@ int HeapFree(int heap, int flags, void* mem) free(mem); return 1; } -/* -void EnterCriticalSection(void* q) -{ - return; -} -void LeaveCriticalSection(void* q) -{ - return; -} -*/ static int last_error; int GetLastError() @@ -155,7 +152,10 @@ int IsBadReadPtr(void* data, int size) } char* HEAP_strdupA(const char* string) { - return strdup(string); +// return strdup(string); + char* answ=malloc(strlen(string)+1); + strcpy(answ, string); + return answ; } short* HEAP_strdupAtoW(void* heap, void* hz, const char* string) { @@ -337,8 +337,7 @@ DWORD flProtect, DWORD dwMaxHigh, DWORD dwMaxLow, const char* name) lseek(hFile, 0, SEEK_SET); } else len=dwMaxLow; -// len=min(len, dwMaxLow); -#warning fixme - should analyze flProtect + if(flProtect & PAGE_READONLY) mmap_access |=PROT_READ; else @@ -363,7 +362,10 @@ DWORD flProtect, DWORD dwMaxHigh, DWORD dwMaxLow, const char* name) fm->next=NULL; fm->handle=answer; if(name) - fm->name=strdup(name); + { + fm->name=malloc(strlen(name)+1); + strcpy(fm->name, name); + } else fm->name=NULL; fm->mapping_size=len; @@ -397,16 +399,61 @@ int UnmapViewOfFile(HANDLE handle) } return 0; } -static int va_size=0; +//static int va_size=0; +struct virt_alloc_s; +typedef struct virt_alloc_s +{ + int mapping_size; + char* address; + struct virt_alloc_s* next; + struct virt_alloc_s* prev; + int state; +}virt_alloc; +static virt_alloc* vm=0; +#define MEM_COMMIT 0x00001000 +#define MEM_RESERVE 0x00002000 + void* VirtualAlloc(void* address, DWORD size, DWORD type, DWORD protection) { void* answer; int fd=open("/dev/zero", O_RDWR); size=(size+0xffff)&(~0xffff); // printf("VirtualAlloc(0x%08X, %d)\n", address + if(address!=0) + { + //check whether we can allow to allocate this + virt_alloc* str=vm; + while(str) + { + if((unsigned)address>=(unsigned)str->address+str->mapping_size) + { + str=str->prev; + continue; + } + if((unsigned)address+size<(unsigned)str->address) + { + str=str->prev; + continue; + } + if(str->state==0) + { +#warning FIXME + if(((unsigned)address+size<(unsigned)str->address+str->mapping_size) && (type & MEM_COMMIT)) + { + close(fd); + return address; //returning previously reserved memory + } + return NULL; + } + close(fd); + return NULL; + } + answer=mmap(address, size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_PRIVATE, fd, 0); + } + else answer=mmap(address, size, PROT_READ | PROT_WRITE | PROT_EXEC, -// ((address!=NULL) ? MAP_FIXED : MAP_SHARED), fd, 0); - MAP_PRIVATE, fd, 0); + MAP_PRIVATE, fd, 0); // answer=FILE_dommap(-1, address, 0, size, 0, 0, // PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE); close(fd); @@ -418,31 +465,60 @@ void* VirtualAlloc(void* address, DWORD size, DWORD type, DWORD protection) } else { - if(va_size!=0) - printf("Multiple VirtualAlloc!\n"); + virt_alloc *new_vm=malloc(sizeof(virt_alloc)); + new_vm->mapping_size=size; + new_vm->address=answer; + new_vm->prev=vm; + if(type & MEM_RESERVE) + new_vm->state=0; + else + new_vm->state=1; + if(vm) + vm->next=new_vm; + vm=new_vm; + vm->next=0; +// if(va_size!=0) +// printf("Multiple VirtualAlloc!\n"); // printf("answer=0x%08x\n", answer); - va_size=size; return answer; } } int VirtualFree(void* address, int t1, int t2)//not sure { - int answer=munmap(address, va_size); - va_size=0; - return answer; + virt_alloc* str=vm; + int answer; + while(str) + { + if(address!=str->address) + { + str=str->prev; + continue; + } + answer=munmap(str->address, str->mapping_size); + if(str->next)str->next->prev=str->prev; + if(str->prev)str->prev->next=str->next; + if(vm==str)vm=0; + free(str); + return 0; + } + return -1; } int WideCharToMultiByte(unsigned int codepage, long flags, const short* src, int srclen,char* dest, int destlen, const char* defch, int* used_defch) { -#warning FIXME int i; -// printf("WCh2MB: Src string "); -// for(i=0; i<=srclen; i++)printf(" %04X", src[i]); if(src==0) return 0; if(dest==0) - return 0; + { + for(i=0; i #include #include -#include +#include +#include +#include #include #include @@ -10,11 +12,23 @@ #include #include #include +#include #include +static unsigned int localcount() +{ + int a; + __asm__ __volatile__("rdtsc\n\t" + :"=a"(a) + : + :"edx"); + return a; +} + void dbgprintf(char* fmt, ...) { +#ifdef DETAILED_OUT va_list va; FILE* f; va_start(va, fmt); @@ -22,6 +36,7 @@ void dbgprintf(char* fmt, ...) vfprintf(f, fmt, va); fsync(f); fclose(f); +#endif } char export_names[500][30]={ "name1", @@ -64,6 +79,11 @@ void* my_mreq(int size, int to_zero) heap=malloc(20000000); memset(heap, 0xCC,20000000); } + if(heap==0) + { + printf("No enough memory\n"); + return 0; + } if(heap_counter+size>20000000) { printf("No enough memory\n"); @@ -82,6 +102,11 @@ void* my_mreq(int size, int to_zero) int my_release(char* memory) { test_heap(); + if(memory==NULL) + { + printf("ERROR: free(0)\n"); + return 0; + } if(*(int*)(memory-8)!=0x433476) { printf("MEMORY CORRUPTION !!!!!!!!!!!!!!!!!!!\n"); @@ -118,6 +143,7 @@ int WINAPI ext_unknown() } int WINAPI expIsBadWritePtr(void* ptr, unsigned int count) { + dbgprintf("IsBadWritePtr(%x, %x)\n", ptr, count); if(count==0) return 0; if(ptr==0) @@ -126,6 +152,7 @@ int WINAPI expIsBadWritePtr(void* ptr, unsigned int count) } int WINAPI expIsBadReadPtr(void* ptr, unsigned int count) { + dbgprintf("IsBadReadPtr(%x, %x)\n", ptr, count); if(count==0) return 0; if(ptr==0) @@ -136,11 +163,19 @@ void* CDECL expmalloc(int size) { //printf("malloc"); // return malloc(size); - return my_mreq(size,0); + void* result=my_mreq(size,0); + dbgprintf("malloc(%x)\n", size); + if(result==0) + { + dbgprintf("returns 0\n"); + printf("WARNING: malloc() failed\n"); + } + return result; } void CDECL expfree(void* mem) { // return free(mem); + dbgprintf("free(%x)\n", mem); my_release(mem); } void* CDECL expnew(int size) @@ -149,15 +184,25 @@ void* CDECL expnew(int size) // printf("%08x %08x %08x %08x\n", // size, *(1+(int*)&size), // *(2+(int*)&size),*(3+(int*)&size)); - return malloc(size); + void* result=expmalloc(size); + dbgprintf("new(%x)\n", size); + if(result==0) + { + dbgprintf("returns 0\n"); + printf("WARNING: malloc() failed\n"); + } + return result; + } int CDECL expdelete(void* memory) { - free(memory); + dbgprintf("delete(%x)\n", memory); + expfree(memory); return 0; } int WINAPI expDisableThreadLibraryCalls(int module) { + dbgprintf("DisableThreadLibraryCalls(%x)\n", module); return 0; } int CDECL exp_initterm(int v1, int v2) @@ -174,8 +219,18 @@ typedef struct { void* WINAPI expGetDriverModuleHandle(DRVR* pdrv) { + dbgprintf("GetDriverModuleHandle(%x)\n", pdrv); return pdrv->hDriverModule; } + +void* WINAPI expGetModuleHandleA(const char* name) +{ + WINE_MODREF* wm; + dbgprintf("GetModuleHandleA(%s)\n", name); + wm=MODULE_FindModule(name); + if(wm==0)return 0; + return (void*)(wm->module); +} struct th_list_t; typedef struct th_list_t{ int id; @@ -233,7 +288,6 @@ void* WINAPI expCreateEventA(void* pSecAttr, char bManualReset, { #warning ManualReset pthread_mutex_t *pm; -// printf("CreateEvent:"); dbgprintf("CreateEvent\n"); if(mlist!=NULL) { @@ -243,7 +297,7 @@ void* WINAPI expCreateEventA(void* pSecAttr, char bManualReset, { if(strcmp(pp->name, name)==0) return pp->pm; - }while(pp=pp->next); + }while(pp=pp->prev); } pm=my_mreq(sizeof(pthread_mutex_t), 0); pthread_mutex_init(pm, NULL); @@ -298,6 +352,7 @@ void WINAPI expGetSystemInfo(SYSTEM_INFO* si) static int cache = 0; static SYSTEM_INFO cachedsi; HKEY xhkey=0,hkey; + dbgprintf("GetSystemInfo()\n"); if (cache) { memcpy(si,&cachedsi,sizeof(*si)); @@ -463,7 +518,15 @@ long WINAPI expHeapDestroy(void* heap) dbgprintf("HeapDestroy(%X)\n", heap); my_release(heap); return 1; -} +} + +long WINAPI expHeapFree(int arg1, int arg2, void* ptr) +{ + dbgprintf("HeapFree(%X, %X, %X)\n", arg1, arg2, ptr); + my_release(ptr); + return 1; +} + void* WINAPI expVirtualAlloc(void* v1, long v2, long v3, long v4) { void* z; @@ -478,28 +541,48 @@ int WINAPI expVirtualFree(void* v1, int v2, int v3) dbgprintf("VirtualFree(%X %X %X) \n",v1,v2,v3); return VirtualFree(v1,v2,v3); } +struct CRITSECT +{ + pthread_t id; + pthread_mutex_t mutex; + int locked; +}; void WINAPI expInitializeCriticalSection(CRITICAL_SECTION* c) { + struct CRITSECT cs; dbgprintf("InitCriticalSection(%X) \n", c); - if(sizeof(pthread_mutex_t)>sizeof(CRITICAL_SECTION)) +/* if(sizeof(pthread_mutex_t)>sizeof(CRITICAL_SECTION)) { printf(" ERROR:::: sizeof(pthread_mutex_t) is %d, expected <=%d!\n", sizeof(pthread_mutex_t), sizeof(CRITICAL_SECTION)); return; - } - pthread_mutex_init((pthread_mutex_t*)c, NULL); + }*/ +/* pthread_mutex_init((pthread_mutex_t*)c, NULL); */ + pthread_mutex_init(&cs.mutex, NULL); + cs.locked=0; + *(void**)c=malloc(sizeof cs); + memcpy(*(void**)c, &cs, sizeof cs); return; } void WINAPI expEnterCriticalSection(CRITICAL_SECTION* c) { + struct CRITSECT* cs=(struct CRITSECT*)c; dbgprintf("EnterCriticalSection(%X) \n",c); - pthread_mutex_lock((pthread_mutex_t*)c); +// cs.id=pthread_self(); + if(cs->locked) + if(cs->id==pthread_self()) + return; + pthread_mutex_lock(&(cs->mutex)); + cs->locked=1; + cs->id=pthread_self(); return; } void WINAPI expLeaveCriticalSection(CRITICAL_SECTION* c) { + struct CRITSECT* cs=(struct CRITSECT*)c; dbgprintf("LeaveCriticalSection(%X) \n",c); - pthread_mutex_unlock((pthread_mutex_t*)c); + cs->locked=0; + pthread_mutex_unlock(&(cs->mutex)); return; } void WINAPI expDeleteCriticalSection(CRITICAL_SECTION *c) @@ -613,9 +696,9 @@ int WINAPI expLoadStringA(long instance, long id, void* buf, long size) long WINAPI expMultiByteToWideChar(long v1, long v2, char* s1, long siz1, char* s2, int siz2) { -#warning fixme +#warning FIXME dbgprintf("MB2WCh\n"); -// printf("WARNING: Unsupported call: MBToWCh %s\n", s1); + printf("WARNING: Unsupported call: MBToWCh %s\n", s1); if(s2==0) return 1; s2[0]=s2[1]=0; @@ -636,12 +719,32 @@ long WINAPI expGetVersionExA(OSVERSIONINFOA* c) strcpy(c->szCSDVersion, "Win98"); return 1; } +#include +#include +#include HANDLE WINAPI expCreateSemaphoreA(char* v1, long init_count, long max_count, char* name) { -#warning fixme +#warning FIXME +/* struct sembuf buf[1]; + int sem=semget(IPC_PRIVATE,1,IPC_CREAT); + if(sem==-1) + { + printf("semget() failed\n"); + return (HANDLE)-1; + } + buf[0].sem_num=0; + printf("%s\n", name); + printf("Init count %d, max count %d\n", init_count, max_count); + buf[0].sem_op=-max_count+init_count; + buf[0].sem_flg=0; + if(semop(sem, &buf, 1)<0) + { + printf("semop() failed\n"); + } + return sem; +*/ void* z; dbgprintf("CreateSemaphoreA\n"); -// printf("CreateSemaphore:"); z=my_mreq(24, 0); pthread_mutex_init(z, NULL); return (HANDLE)z; @@ -649,47 +752,66 @@ HANDLE WINAPI expCreateSemaphoreA(char* v1, long init_count, long max_count, cha long WINAPI expReleaseSemaphore(long hsem, long increment, long* prev_count) { -// The state of a semaphore object is signaled when its count -// is greater than zero and nonsignaled when its count is equal to zero -// Each time a waiting thread is released because of the semaphore's signaled -// state, the count of the semaphore is decreased by one. +// The state of a semaphore object is signaled when its count +// is greater than zero and nonsignaled when its count is equal to zero +// Each time a waiting thread is released because of the semaphore's signaled +// state, the count of the semaphore is decreased by one. + struct sembuf buf[1]; dbgprintf("ReleaseSemaphore\n"); printf("WARNING: Unsupported call: ReleaseSemaphoreA\n"); +/* if(hsem==-1)return 0; + buf[0].sem_num=0; + buf[0].sem_op=-1; + buf[0].sem_flg=0; + if(semop(hsem, &buf, 1)<0) + { + printf("ReleaseSemaphore: semop() failed\n"); + }*/ + return 1;//zero on error } -long WINAPI expRegOpenKeyExA(long key, char* subkey, long reserved, long access, long* newkey) -{ +long WINAPI expRegOpenKeyExA(long key, const char* subkey, long reserved, long access, int* newkey) +{ + dbgprintf("RegOpenKeyExA(%d,%s)\n", key, subkey); return RegOpenKeyExA(key, subkey, reserved, access, newkey); } long WINAPI expRegCloseKey(long key) -{ +{ + dbgprintf("RegCloseKey()\n"); return RegCloseKey(key); } -long WINAPI expRegQueryValueExA(long key, char* value, int* reserved, int* type, int* data, int* count) +long WINAPI expRegQueryValueExA(long key, const char* value, int* reserved, int* type, int* data, int* count) { + dbgprintf("RegQueryValueExA()\n"); return RegQueryValueExA(key, value, reserved, type, data, count); } -long WINAPI expRegCreateKeyExA(long key, char* name, long reserved, - void* classs, long options, long security, +long WINAPI expRegCreateKeyExA(long key, const char* name, long reserved, + void* classs, long options, long security, void* sec_attr, int* newkey, int* status) -{ +{ + dbgprintf("RegCreateKeyExA()\n"); return RegCreateKeyExA(key, name, reserved, classs, options, security, sec_attr, newkey, status); } -long WINAPI expRegSetValueExA(long key, char* name, long v1, long v2, void* data, long size) +long WINAPI expRegSetValueExA(long key, const char* name, long v1, long v2, void* data, long size) { + dbgprintf("RegSetValueExA()\n"); return RegSetValueExA(key, name, v1, v2, data, size); } -long WINAPI expRegOpenKeyA ( long hKey, LPCSTR lpSubKey, long* phkResult ) -{ +long WINAPI expRegOpenKeyA ( +long hKey, + LPCSTR lpSubKey, + int* phkResult +){ return RegOpenKeyExA(hKey, lpSubKey, 0, 0, phkResult); -} +} long WINAPI expQueryPerformanceCounter(long long* z) { - __asm__( + dbgprintf("QueryPerformanceCounter()\n"); + __asm__ __volatile__( "rdtsc\n\t" "movl %%eax, 0(%0)\n\t" "movl %%edx, 4(%0)\n\t" @@ -697,17 +819,69 @@ long WINAPI expQueryPerformanceCounter(long long* z) return 1; } +static double old_freq() +{ + int i=time(NULL); + int x,y; + while(i==time(NULL)); + x=localcount(); + i++; + while(i==time(NULL)); + y=localcount(); + return (double)(y-x)/1000.; +} +static double CPU_Freq() +{ + FILE *f = fopen ("/proc/cpuinfo", "r"); + char line[200]; + char model[200]="unknown"; + char flags[500]=""; + char *s,*value; + double freq=-1; + + if (!f) + { + printf("Can't open /proc/cpuinfo for reading\n"); + return old_freq(); + } + while (fgets(line,200,f)!=NULL) + { + /* NOTE: the ':' is the only character we can rely on */ + if (!(value = strchr(line,':'))) + continue; + /* terminate the valuename */ + *value++ = '\0'; + /* skip any leading spaces */ + while (*value==' ') value++; + if ((s=strchr(value,'\n'))) + *s='\0'; + + if (!strncasecmp(line, "cpu MHz",strlen("cpu MHz"))) + { + sscanf(value, "%lf", &freq); + freq*=1000; + break; + } + continue; + + } + fclose(f); + if(freq<0)return old_freq(); + return freq; +} + long WINAPI expQueryPerformanceFrequency(long long* z) { -#warning fixme - *z=(long long)550000000; + dbgprintf("QueryPerformanceFrequency()\n"); + *z=(long long)CPU_Freq(); return 1; } long WINAPI exptimeGetTime() -{ - struct timeb t; - ftime(&t); - return 1000*t.time+t.millitm; +{ + struct timeval t; + dbgprintf("timeGetTime()\n"); + gettimeofday(&t, 0); + return 1000*t.tv_sec+t.tv_usec/1000; } void* WINAPI expLocalHandle(void* v) { @@ -723,7 +897,7 @@ int WINAPI expGlobalUnlock(void* v) { dbgprintf("GlobalUnlock\n"); return 1; -} +} // void* WINAPI expGlobalFree(void* v) { @@ -736,7 +910,7 @@ int WINAPI expLocalUnlock(void* v) { dbgprintf("LocalUnlock\n"); return 1; -} +} // void* WINAPI expLocalFree(void* v) { @@ -744,14 +918,12 @@ void* WINAPI expLocalFree(void* v) my_release(v); return 0; } - -// HRSRC fun(HMODULE module, char* name, char* type) + HRSRC WINAPI expFindResourceA(HMODULE module, char* name, char* type) { dbgprintf("FindResourceA\n"); return FindResourceA(module, name, type); -} -//HGLOBAL fun(HMODULE module, HRSRC res) +} HGLOBAL WINAPI expLoadResource(HMODULE module, HRSRC res) { dbgprintf("LoadResource\n"); @@ -762,12 +934,12 @@ void* WINAPI expLockResource(long res) dbgprintf("LockResource\n"); return LockResource(res); } -int /*bool*/ WINAPI expFreeResource(long res) +int WINAPI expFreeResource(long res) { dbgprintf("FreeResource\n"); return FreeResource(res); } -//bool fun(HANDLE) +//bool fun(HANDLE) //!0 on success int WINAPI expCloseHandle(long v1) { @@ -778,11 +950,11 @@ int WINAPI expCloseHandle(long v1) const char* WINAPI expGetCommandLineA() { dbgprintf("GetCommandLine\n"); - return "aviplay"; + return "c:\\aviplay.exe"; } LPWSTR WINAPI expGetEnvironmentStringsW() { - static short envs[]={0}; + static short envs[]={'p', 'a', 't', 'h', ' ', 'c', ':', '\\', 0}; dbgprintf("GetEnvStringsW\n"); return envs; } @@ -827,15 +999,27 @@ int WINAPI expGetACP() printf("WARNING: Unsupported call: GetACP\n"); return 0; } +extern WINE_MODREF *MODULE32_LookupHMODULE(HMODULE m); int WINAPI expGetModuleFileNameA(int module, char* s, int len) { + WINE_MODREF *mr; dbgprintf("GetModuleFileNameA\n"); printf("File name of module %X requested\n", module); if(s==0) return 0; - if(len<10) + if(len<35) return 0; - strcpy(s, "aviplay"); + strcpy(s, "c:\\windows\\system\\"); + mr=MODULE32_LookupHMODULE(module); + if(mr==0)//oops + { + strcat(s, "aviplay.dll"); + return 1; + } + if(strrchr(mr->filename, '/')==NULL) + strcat(s, mr->filename); + else + strcat(s, strrchr(mr->filename, '/')+1); return 1; } @@ -856,7 +1040,6 @@ int WINAPI expFreeLibrary(int module) { dbgprintf("FreeLibrary\n"); return FreeLibrary(module); -// return 0; } void* WINAPI expGetProcAddress(HMODULE mod, char* name) { @@ -873,10 +1056,8 @@ long WINAPI expCreateFileMappingA(int hFile, void* lpAttr, long WINAPI expOpenFileMappingA(long hFile, long hz, const char* name) { -#warning fixme -// dbgprintf("OpenFileMappingA\n"); + dbgprintf("OpenFileMappingA\n"); return OpenFileMappingA(hFile, hz, name); -// return 0; } void* WINAPI expMapViewOfFile(HANDLE file, DWORD mode, DWORD offHigh, DWORD offLow, DWORD size) @@ -886,36 +1067,129 @@ void* WINAPI expMapViewOfFile(HANDLE file, DWORD mode, DWORD offHigh, DWORD offL return (char*)file+offLow; } +void* WINAPI expUnmapViewOfFile(void* view) +{ + dbgprintf("UnmapViewOfFile()\n"); + return 0; +} + void* WINAPI expSleep(int time) { dbgprintf("Sleep(%d)\n", time); usleep(time); return 0; +} + // why does IV32 codec want to call this? I don't know ... +void* WINAPI expCreateCompatibleDC(int hdc) +{ + dbgprintf("CreateCompatibleDC(%d)\n", hdc); + return (void*)129; +} + +int WINAPI expGetDeviceCaps(int hdc, int unk) +{ + dbgprintf("GetDeviceCaps(%d, %d)\n", hdc, unk); + return 0; +} + +WIN_BOOL WINAPI expDeleteDC(int hdc) +{ + dbgprintf("DeleteDC(%d)\n", hdc); + return 0; +} + +int expwsprintfA(char* string, char* format, ...) +{ + va_list va; + va_start(va, format); + dbgprintf("wsprintfA\n"); + return vsprintf(string, format, va); +} + +int WINAPI expGetPrivateProfileIntA(char* appname, char* keyname, int default_value, char* filename) +{ + int size=4; + char* fullname; + dbgprintf("GetPrivateProfileIntA(%s, %s, %s)\n", appname, keyname, filename ); + if(!(appname && keyname && filename) ) return default_value; + fullname=(char*)malloc(50+strlen(appname)+strlen(keyname)+strlen(filename)); + strcpy(fullname, "Software\\IniFileMapping\\"); + strcat(fullname, appname); + strcat(fullname, "\\"); + strcat(fullname, keyname); + strcat(fullname, "\\"); + strcat(fullname, filename); + RegQueryValueExA(HKEY_LOCAL_MACHINE, fullname, NULL, NULL, &default_value, &size); + free(fullname); + return default_value; +} + +int WINAPI expDefDriverProc(int _private, int id, int msg, int arg1, int arg2) +{ + printf("Called DefDriverProc(%X)\n", msg); + return 0; +} + +int WINAPI expSizeofResource(int v1, int v2) +{ + dbgprintf("SizeofResource()\n"); + return SizeofResource(v1, v2); +} + +int WINAPI expGetLastError() +{ + dbgprintf("GetLastError()\n"); + return GetLastError(); +} + +void WINAPI expSetLastError(int error) +{ + dbgprintf("SetLastError()\n"); + SetLastError(error); } +char* expstrrchr(char* string, int value) +{ + return strrchr(string, value); +} +char* expstrchr(char* string, int value) +{ + return strchr(string, value); +} +int WINAPI expGetFileVersionInfoSizeA(const char* name, int* lpHandle) +{ + printf("GetFileVersionInfoSizeA(%s,0x%X)\n", name, lpHandle); + return 0; +} +int WINAPI expIsBadStringPtrW(const short* string, int nchars) +{ + if(string==0)return 1; + return 0; +} +extern long WINAPI InterlockedExchangeAdd( long* dest, long incr ) +{ + long ret; + __asm__ __volatile__( "lock; xaddl %0,(%1)" + : "=r" (ret) : "r" (dest), "0" (incr) : "memory" ); + return ret; +} +extern long WINAPI expInterlockedIncrement( long* dest ) +{ + return InterlockedExchangeAdd( dest, 1 ) + 1; +} +extern long WINAPI expInterlockedDecrement( long* dest ) +{ + return InterlockedExchangeAdd( dest, -1 ) - 1; +} - - - - - - - - - - - - - - - - - - +extern void WINAPI expOutputDebugStringA( const char* string ) +{ + fprintf(stderr, "DEBUG: %s\n", string); +} struct exports { @@ -936,6 +1210,7 @@ struct libs struct exports exp_kernel32[]={ FF(IsBadWritePtr, 357) FF(IsBadReadPtr, 354) +FF(IsBadStringPtrW, -1) FF(DisableThreadLibraryCalls, -1) FF(CreateThread, -1) FF(CreateEventA, -1) @@ -947,10 +1222,11 @@ FF(GetVersion, 332) FF(HeapCreate, 461) FF(HeapAlloc, -1) FF(HeapDestroy, -1) +FF(HeapFree, -1) FF(VirtualAlloc, -1) FF(VirtualFree, -1) FF(InitializeCriticalSection, -1) -FF(EnterCriticalSection, -1) +FF(EnterCriticalSection, -1) FF(LeaveCriticalSection, -1) FF(DeleteCriticalSection, -1) FF(TlsAlloc, -1) @@ -960,7 +1236,7 @@ FF(TlsSetValue, -1) FF(GetCurrentThreadId, -1) FF(LocalAlloc, -1) FF(LocalLock, -1) -FF(GlobalAlloc, -1) +FF(GlobalAlloc, -1) FF(GlobalLock, -1) FF(MultiByteToWideChar, 427) FF(WideCharToMultiByte, -1) @@ -979,10 +1255,11 @@ FF(ReleaseSemaphore, -1) FF(FindResourceA, -1) FF(LockResource, -1) FF(FreeResource, -1) +FF(SizeofResource, -1) FF(CloseHandle, -1) FF(GetCommandLineA, -1) -FF(GetEnvironmentStringsW, -1) -FF(FreeEnvironmentStringsW, -1) +FF(GetEnvironmentStringsW, -1) +FF(FreeEnvironmentStringsW, -1) FF(GetEnvironmentStrings, -1) FF(GetStartupInfoA, -1) FF(GetStdHandle, -1) @@ -997,7 +1274,15 @@ FF(FreeLibrary, -1) FF(CreateFileMappingA, -1) FF(OpenFileMappingA, -1) FF(MapViewOfFile, -1) +FF(UnmapViewOfFile, -1) FF(Sleep, -1) +FF(GetModuleHandleA, -1) +FF(GetPrivateProfileIntA, -1) +FF(GetLastError, -1) +FF(SetLastError, -1) +FF(InterlockedIncrement, -1) +FF(InterlockedDecrement, -1) +FF(OutputDebugStringA, -1) }; struct exports exp_msvcrt[]={ @@ -1006,13 +1291,17 @@ FF(_initterm, -1) FF(free, -1) {"??3@YAXPAX@Z", -1, expdelete}, {"??2@YAPAXI@Z", -1, expnew}, +FF(strrchr, -1) +FF(strchr, -1) }; struct exports exp_winmm[]={ FF(GetDriverModuleHandle, -1) -FF(timeGetTime, -1) +FF(timeGetTime, -1) +FF(DefDriverProc, -1) }; struct exports exp_user32[]={ FF(LoadStringA, -1) +FF(wsprintfA, -1) }; struct exports exp_advapi32[]={ FF(RegOpenKeyA, -1) @@ -1022,6 +1311,14 @@ FF(RegQueryValueExA, -1) FF(RegSetValueExA, -1) FF(RegCloseKey, -1) }; +struct exports exp_gdi32[]={ +FF(CreateCompatibleDC, -1) +FF(GetDeviceCaps, -1) +FF(DeleteDC, -1) +}; +struct exports exp_version[]={ +FF(GetFileVersionInfoSizeA, -1) +}; #define LL(X) \ {#X".dll", sizeof(exp_##X)/sizeof(struct exports), exp_##X}, @@ -1031,6 +1328,8 @@ LL(msvcrt) LL(winmm) LL(user32) LL(advapi32) +LL(gdi32) +LL(version) }; void* LookupExternal(const char* library, int ordinal) @@ -1070,6 +1369,7 @@ void* LookupExternalByName(const char* library, const char* name) { char* answ; int i,j; +// return (void*)ext_unknown; if(library==0) { printf("ERROR: library=0\n"); diff --git a/libs/winloader/module.c b/libs/winloader/module.c index 52fb78ee1d..eda45d13ea 100644 --- a/libs/winloader/module.c +++ b/libs/winloader/module.c @@ -5,12 +5,42 @@ */ #include +#include #include -#include #include +#include #include -#include #include + +#include +#include + +#ifdef __linux__ +#include +#include +#else +#define LDT_ENTRIES 8192 +#define LDT_ENTRY_SIZE 8 + +struct modify_ldt_ldt_s { + unsigned int entry_number; + unsigned long base_addr; + unsigned int limit; + unsigned int seg_32bit:1; + unsigned int contents:2; + unsigned int read_exec_only:1; + unsigned int limit_in_pages:1; + unsigned int seg_not_present:1; + unsigned int useable:1; +}; + +#define MODIFY_LDT_CONTENTS_DATA 0 +#define MODIFY_LDT_CONTENTS_STACK 1 +#define MODIFY_LDT_CONTENTS_CODE 2 +#define __NR_modify_ldt 123 +#endif + + #include #include #include @@ -29,6 +59,132 @@ typedef struct modref_list_t modref_list; +/*********************************************************************** + * LDT_EntryToBytes + * + * Convert an ldt_entry structure to the raw bytes of the descriptor. + */ +static void LDT_EntryToBytes( unsigned long *buffer, const struct modify_ldt_ldt_s *content ) +{ + *buffer++ = ((content->base_addr & 0x0000ffff) << 16) | + (content->limit & 0x0ffff); + *buffer = (content->base_addr & 0xff000000) | + ((content->base_addr & 0x00ff0000)>>16) | + (content->limit & 0xf0000) | + (content->contents << 10) | + ((content->read_exec_only == 0) << 9) | + ((content->seg_32bit != 0) << 22) | + ((content->limit_in_pages != 0) << 23) | + 0xf000; +} + + +// +// funcs: +// +// 0 read LDT +// 1 write old mode +// 0x11 write +// +static int modify_ldt( int func, struct modify_ldt_ldt_s *ptr, + unsigned long count ) +{ + int res; +#ifdef __PIC__ + __asm__ __volatile__( "pushl %%ebx\n\t" + "movl %2,%%ebx\n\t" + "int $0x80\n\t" + "popl %%ebx" + : "=a" (res) + : "0" (__NR_modify_ldt), + "r" (func), + "c" (ptr), + "d" (sizeof(struct modify_ldt_ldt_s)*count) ); +#else + __asm__ __volatile__("int $0x80" + : "=a" (res) + : "0" (__NR_modify_ldt), + "b" (func), + "c" (ptr), + "d" (sizeof(struct modify_ldt_ldt_s)*count) ); +#endif /* __PIC__ */ + if (res >= 0) return res; + errno = -res; + return -1; +} +static int fs_installed=0; +static char* fs_seg=0; +static int install_fs() +{ + struct modify_ldt_ldt_s array; + int fd; + int ret; + void* prev_struct; + + if(fs_installed) + return 0; + + fd=open("/dev/zero", O_RDWR); + fs_seg=mmap((void*)0xbf000000, 0x30000, PROT_READ | PROT_WRITE, MAP_PRIVATE, + fd, 0); + if(fs_seg==0) + { + printf("ERROR: Couldn't allocate memory for fs segment\n"); + return -1; + } + array.base_addr=((int)fs_seg+0xffff) & 0xffff0000; + array.entry_number=0x1; + array.limit=array.base_addr+getpagesize()-1; + array.seg_32bit=1; + array.read_exec_only=0; + array.seg_not_present=0; + array.contents=MODIFY_LDT_CONTENTS_DATA; + array.limit_in_pages=0; +#ifdef linux + ret=modify_ldt(0x1, &array, 1); + if(ret<0) + { + perror("install_fs"); + MESSAGE("Couldn't install fs segment, expect segfault\n"); + } +#endif /*linux*/ + +#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__OpenBSD__) + { + long d[2]; + + LDT_EntryToBytes( d, &array ); + ret = i386_set_ldt(0x1, (union descriptor *)d, 1); + if (ret < 0) + { + perror("install_fs"); + MESSAGE("Did you reconfigure the kernel with \"options USER_LDT\"?\n"); + } + } +#endif /* __NetBSD__ || __FreeBSD__ || __OpenBSD__ */ + __asm__ + ( + "movl $0xf,%eax\n\t" + "pushw %ax\n\t" + "movw %ax, %fs\n\t" + ); + prev_struct=malloc(8); + *(void**)array.base_addr=prev_struct; + printf("prev_struct: 0x%X\n", prev_struct); + close(fd); + + fs_installed=1; + return 0; +}; +static int uninstall_fs() +{ + if(fs_seg==0) + return -1; + munmap(fs_seg, 0x30000); + return 0; +} + + //WINE_MODREF *local_wm=NULL; modref_list* local_wm=NULL; @@ -40,6 +196,7 @@ WINE_MODREF *MODULE_FindModule(LPCSTR m) return NULL; while(strcmp(m, list->wm->filename)) { + printf("%s: %x\n", list->wm->filename, list->wm->module); list=list->prev; if(list==NULL) return NULL; @@ -59,6 +216,7 @@ void MODULE_RemoveFromList(WINE_MODREF *mod) { free(list); local_wm=NULL; + uninstall_fs(); return; } for(;list;list=list->prev) @@ -241,330 +399,6 @@ void MODULE_DllProcessDetach( WINE_MODREF* wm, WIN_BOOL bForceDetach, LPVOID lpR MODULE_InitDll( wm, DLL_PROCESS_DETACH, lpReserved ); } -/************************************************************************* - * MODULE_DllThreadAttach - * - * Send DLL thread attach notifications. These are sent in the - * reverse sequence of process detach notification. - * - */ - /* -void MODULE_DllThreadAttach( LPVOID lpReserved ) -{ - WINE_MODREF *wm; - - MODULE_InitDll( wm, DLL_THREAD_ATTACH, lpReserved ); -}*/ - -/************************************************************************* - * MODULE_DllThreadDetach - * - * Send DLL thread detach notifications. These are sent in the - * same sequence as process detach notification. - * - */ - /* -void MODULE_DllThreadDetach( LPVOID lpReserved ) -{ - WINE_MODREF *wm; - MODULE_InitDll( wm, DLL_THREAD_DETACH, lpReserved ); -} -*/ - -/*********************************************************************** - * MODULE_CreateDummyModule - * - * Create a dummy NE module for Win32 or Winelib. - */ -HMODULE MODULE_CreateDummyModule( LPCSTR filename, HMODULE module32 ) -{ - printf("MODULE_CreateDummyModule:: Not implemented\n"); - return 0; -} -/* -HMODULE MODULE_CreateDummyModule( LPCSTR filename, HMODULE module32 ) -{ - HMODULE hModule; - NE_MODULE *pModule; - SEGTABLEENTRY *pSegment; - char *pStr,*s; - unsigned int len; - const char* basename; - OFSTRUCT *ofs; - int of_size, size; - - // Extract base filename - basename = strrchr(filename, '\\'); - if (!basename) basename = filename; - else basename++; - len = strlen(basename); - if ((s = strchr(basename, '.'))) len = s - basename; - - // Allocate module - of_size = sizeof(OFSTRUCT) - sizeof(ofs->szPathName) - + strlen(filename) + 1; - size = sizeof(NE_MODULE) + - // loaded file info - of_size + - // segment table: DS,CS - 2 * sizeof(SEGTABLEENTRY) + - // name table - len + 2 + - // several empty tables - 8; - - hModule = GlobalAlloc16( GMEM_MOVEABLE | GMEM_ZEROINIT, size ); - if (!hModule) return (HMODULE)11; // invalid exe - - FarSetOwner16( hModule, hModule ); - pModule = (NE_MODULE *)GlobalLock16( hModule ); - - // Set all used entries - pModule->magic = IMAGE_OS2_SIGNATURE; - pModule->count = 1; - pModule->next = 0; - pModule->flags = 0; - pModule->dgroup = 0; - pModule->ss = 1; - pModule->cs = 2; - pModule->heap_size = 0; - pModule->stack_size = 0; - pModule->seg_count = 2; - pModule->modref_count = 0; - pModule->nrname_size = 0; - pModule->fileinfo = sizeof(NE_MODULE); - pModule->os_flags = NE_OSFLAGS_WINDOWS; - pModule->self = hModule; - pModule->module32 = module32; - - // Set version and flags - if (module32) - { - pModule->expected_version = - ((PE_HEADER(module32)->OptionalHeader.MajorSubsystemVersion & 0xff) << 8 ) | - (PE_HEADER(module32)->OptionalHeader.MinorSubsystemVersion & 0xff); - pModule->flags |= NE_FFLAGS_WIN32; - if (PE_HEADER(module32)->FileHeader.Characteristics & IMAGE_FILE_DLL) - pModule->flags |= NE_FFLAGS_LIBMODULE | NE_FFLAGS_SINGLEDATA; - } - - // Set loaded file information - ofs = (OFSTRUCT *)(pModule + 1); - memset( ofs, 0, of_size ); - ofs->cBytes = of_size < 256 ? of_size : 255; // FIXME - strcpy( ofs->szPathName, filename ); - - pSegment = (SEGTABLEENTRY*)((char*)(pModule + 1) + of_size); - pModule->seg_table = (int)pSegment - (int)pModule; - // Data segment - pSegment->size = 0; - pSegment->flags = NE_SEGFLAGS_DATA; - pSegment->minsize = 0x1000; - pSegment++; - // Code segment - pSegment->flags = 0; - pSegment++; - - // Module name - pStr = (char *)pSegment; - pModule->name_table = (int)pStr - (int)pModule; - assert(len<256); - *pStr = len; - lstrcpynA( pStr+1, basename, len+1 ); - pStr += len+2; - - // All tables zero terminated - pModule->res_table = pModule->import_table = pModule->entry_table = - (int)pStr - (int)pModule; - - NE_RegisterModule( pModule ); - return hModule; -} - -*/ - -/*********************************************************************** - * MODULE_GetBinaryType - * - * The GetBinaryType function determines whether a file is executable - * or not and if it is it returns what type of executable it is. - * The type of executable is a property that determines in which - * subsystem an executable file runs under. - * - * Binary types returned: - * SCS_32BIT_BINARY: A Win32 based application - * SCS_DOS_BINARY: An MS-Dos based application - * SCS_WOW_BINARY: A Win16 based application - * SCS_PIF_BINARY: A PIF file that executes an MS-Dos based app - * SCS_POSIX_BINARY: A POSIX based application ( Not implemented ) - * SCS_OS216_BINARY: A 16bit OS/2 based application - * - * Returns TRUE if the file is an executable in which case - * the value pointed by lpBinaryType is set. - * Returns FALSE if the file is not an executable or if the function fails. - * - * To do so it opens the file and reads in the header information - * if the extended header information is not present it will - * assume that the file is a DOS executable. - * If the extended header information is present it will - * determine if the file is a 16 or 32 bit Windows executable - * by check the flags in the header. - * - * Note that .COM and .PIF files are only recognized by their - * file name extension; but Windows does it the same way ... - */ - /* -static WIN_BOOL MODULE_GetBinaryType( HANDLE hfile, LPCSTR filename, - LPDWORD lpBinaryType ) -{ - IMAGE_DOS_HEADER mz_header; - char magic[4], *ptr; - DWORD len; - - // Seek to the start of the file and read the DOS header information. - if ( SetFilePointer( hfile, 0, NULL, SEEK_SET ) != -1 - && ReadFile( hfile, &mz_header, sizeof(mz_header), &len, NULL ) - && len == sizeof(mz_header) ) - { - // Now that we have the header check the e_magic field - // to see if this is a dos image. - // - if ( mz_header.e_magic == IMAGE_DOS_SIGNATURE ) - { - WIN_BOOL lfanewValid = FALSE; - // We do have a DOS image so we will now try to seek into - // the file by the amount indicated by the field - // "Offset to extended header" and read in the - // "magic" field information at that location. - // This will tell us if there is more header information - // to read or not. - // - // But before we do we will make sure that header - // structure encompasses the "Offset to extended header" - // field. - // - if ( (mz_header.e_cparhdr<<4) >= sizeof(IMAGE_DOS_HEADER) ) - if ( ( mz_header.e_crlc == 0 ) || - ( mz_header.e_lfarlc >= sizeof(IMAGE_DOS_HEADER) ) ) - if ( mz_header.e_lfanew >= sizeof(IMAGE_DOS_HEADER) - && SetFilePointer( hfile, mz_header.e_lfanew, NULL, SEEK_SET ) != -1 - && ReadFile( hfile, magic, sizeof(magic), &len, NULL ) - && len == sizeof(magic) ) - lfanewValid = TRUE; - - if ( !lfanewValid ) - { - // If we cannot read this "extended header" we will - // assume that we have a simple DOS executable. - // - *lpBinaryType = SCS_DOS_BINARY; - return TRUE; - } - else - { - // Reading the magic field succeeded so - // we will try to determine what type it is. - // - if ( *(DWORD*)magic == IMAGE_NT_SIGNATURE ) - { - // This is an NT signature. - // - *lpBinaryType = SCS_32BIT_BINARY; - return TRUE; - } - else if ( *(WORD*)magic == IMAGE_OS2_SIGNATURE ) - { - // The IMAGE_OS2_SIGNATURE indicates that the - // "extended header is a Windows executable (NE) - // header." This can mean either a 16-bit OS/2 - // or a 16-bit Windows or even a DOS program - // (running under a DOS extender). To decide - // which, we'll have to read the NE header. - /// - - IMAGE_OS2_HEADER ne; - if ( SetFilePointer( hfile, mz_header.e_lfanew, NULL, SEEK_SET ) != -1 - && ReadFile( hfile, &ne, sizeof(ne), &len, NULL ) - && len == sizeof(ne) ) - { - switch ( ne.ne_exetyp ) - { - case 2: *lpBinaryType = SCS_WOW_BINARY; return TRUE; - case 5: *lpBinaryType = SCS_DOS_BINARY; return TRUE; - default: *lpBinaryType = SCS_OS216_BINARY; return TRUE; - } - } - // Couldn't read header, so abort. - return FALSE; - } - else - { - // Unknown extended header, but this file is nonetheless - // DOS-executable. - // - *lpBinaryType = SCS_DOS_BINARY; - return TRUE; - } - } - } - } - - // If we get here, we don't even have a correct MZ header. - // Try to check the file extension for known types ... - // - ptr = strrchr( filename, '.' ); - if ( ptr && !strchr( ptr, '\\' ) && !strchr( ptr, '/' ) ) - { - if ( !lstrcmpiA( ptr, ".COM" ) ) - { - *lpBinaryType = SCS_DOS_BINARY; - return TRUE; - } - - if ( !lstrcmpiA( ptr, ".PIF" ) ) - { - *lpBinaryType = SCS_PIF_BINARY; - return TRUE; - } - } - - return FALSE; -} -*/ -/*********************************************************************** - * GetBinaryTypeA [KERNEL32.280] - */ -/* -WIN_BOOL WINAPI GetBinaryTypeA( LPCSTR lpApplicationName, LPDWORD lpBinaryType ) -{ - WIN_BOOL ret = FALSE; - HANDLE hfile; - - TRACE_(win32)("%s\n", lpApplicationName ); - - // Sanity check. - - if ( lpApplicationName == NULL || lpBinaryType == NULL ) - return FALSE; - - // Open the file indicated by lpApplicationName for reading. - - hfile = CreateFileA( lpApplicationName, GENERIC_READ, 0, - NULL, OPEN_EXISTING, 0, -1 ); - if ( hfile == INVALID_HANDLE_VALUE ) - return FALSE; - - // Check binary type - - ret = MODULE_GetBinaryType( hfile, lpApplicationName, lpBinaryType ); - - // Close the file. - - CloseHandle( hfile ); - - return ret; -} -*/ /*********************************************************************** * LoadLibraryExA (KERNEL32) @@ -578,7 +412,9 @@ HMODULE WINAPI LoadLibraryExA(LPCSTR libname, HANDLE hfile, DWORD flags) SetLastError(ERROR_INVALID_PARAMETER); return 0; } - + if(fs_installed==0) + install_fs(); + wm = MODULE_LoadLibraryExA( libname, hfile, flags ); if ( wm ) @@ -619,11 +455,13 @@ WINE_MODREF *MODULE_LoadLibraryExA( LPCSTR libname, HFILE hfile, DWORD flags ) SetLastError( ERROR_FILE_NOT_FOUND ); TRACE("Trying native dll '%s'\n", libname); pwm = PE_LoadLibraryExA(libname, flags); +#ifdef HAVE_LIBDL if(!pwm) { TRACE("Trying ELF dll '%s'\n", libname); - pwm=ELFDLL_LoadLibraryExA(libname, flags); + pwm=(WINE_MODREF*)ELFDLL_LoadLibraryExA(libname, flags); } +#endif // printf("0x%08x\n", pwm); // break; if(pwm) @@ -761,10 +599,12 @@ FARPROC MODULE_GetProcAddress( retproc = PE_FindExportedFunction( wm, function, snoop ); if (!retproc) SetLastError(ERROR_PROC_NOT_FOUND); return retproc; +#ifdef HAVE_LIBDL case MODULE32_ELF: - retproc = dlsym( wm->module, function); + retproc = (FARPROC) dlsym( wm->module, function); if (!retproc) SetLastError(ERROR_PROC_NOT_FOUND); return retproc; +#endif default: ERR("wine_modref type %d not handled.\n",wm->type); SetLastError(ERROR_INVALID_HANDLE); diff --git a/libs/winloader/pe_image.c b/libs/winloader/pe_image.c index f3edd2b6e9..a98529a2cc 100644 --- a/libs/winloader/pe_image.c +++ b/libs/winloader/pe_image.c @@ -289,7 +289,7 @@ DWORD fixup_imports( WINE_MODREF *wm ) break; //#warning FIXME: here we should fill imports - printf("Loading imports for %s\n", name); + TRACE("Loading imports for %s.dll\n", name); if (pe_imp->u.OriginalFirstThunk != 0) { TRACE("Microsoft style imports used\n"); @@ -388,7 +388,7 @@ static void do_relocations( unsigned int load_addr, IMAGE_BASE_RELOCATION *r ) { int offset = r->TypeOffset[i] & 0xFFF; int type = r->TypeOffset[i] >> 12; - TRACE_(fixup)("patching %x type %x\n", offset, type); +// TRACE_(fixup)("patching %x type %x\n", offset, type); switch(type) { case IMAGE_REL_BASED_ABSOLUTE: break; @@ -797,20 +797,12 @@ WINE_MODREF *PE_CreateModule( HMODULE hModule, wm->binfmt.pe.pe_resource = pe_resource; wm->binfmt.pe.tlsindex = -1; - wm->filename = strdup( filename ); + wm->filename = malloc(strlen(filename)+1); + strcpy(wm->filename, filename ); wm->modname = strrchr( wm->filename, '\\' ); if (!wm->modname) wm->modname = wm->filename; else wm->modname++; -// result = GetShortPathNameA( wm->filename, NULL, 0 ); -// wm->short_filename = (char *)HeapAlloc( GetProcessHeap(), 0, result+1 ); -// GetShortPathNameA( wm->filename, wm->short_filename, result+1 ); -// wm->short_modname = strrchr( wm->short_filename, '\\' ); -// if (!wm->short_modname) wm->short_modname = wm->short_filename; -// else wm->short_modname++; -// return NULL; -// } - if ( pe_export ) dump_exports( hModule ); @@ -836,22 +828,15 @@ WINE_MODREF *PE_CreateModule( HMODULE hModule, */ WINE_MODREF *PE_LoadLibraryExA (LPCSTR name, DWORD flags) { -// struct load_dll_request *req = get_req_buffer(); HMODULE hModule32; WINE_MODREF *wm; char filename[256]; -// HANDLE hFile; int hFile; WORD version = 0; -// if ( SearchPathA( NULL, name, ".DLL", -// sizeof(filename), filename, NULL ) == 0 ) return NULL; strncpy(filename, name, sizeof(filename)); -// hFile = CreateFileA( filename, GENERIC_READ, FILE_SHARE_READ, -// NULL, OPEN_EXISTING, 0, -1 ); hFile=open(filename, O_RDONLY); -// if ( hFile == INVALID_HANDLE_VALUE ) return NULL; if(hFile==-1) return NULL; @@ -863,34 +848,12 @@ WINE_MODREF *PE_LoadLibraryExA (LPCSTR name, DWORD flags) return NULL; } - // Create 16-bit dummy module -/* - if ((hModule16 = MODULE_CreateDummyModule( filename, hModule32 )) < 32) - { - CloseHandle( hFile ); - SetLastError( (DWORD)hModule16 ); // This should give the correct error - return NULL; - } -*/ - if ( !(wm = PE_CreateModule( hModule32, filename, flags, FALSE )) ) { ERR( "can't load %s\n", filename ); - // FreeLibrary16( hModule16 ); SetLastError( ERROR_OUTOFMEMORY ); return NULL; } - /* - if (wm->binfmt.pe.pe_export) - SNOOP_RegisterDLL(wm->module,wm->modname,wm->binfmt.pe.pe_export->NumberOfFunctions); - - req->handle = hFile; - req->base = (void *)hModule32; - req->dbg_offset = 0; - req->dbg_size = 0; - req->name = &wm->modname; - server_call_noerr( REQ_LOAD_DLL ); - */ close(hFile); return wm; } @@ -944,13 +907,6 @@ WIN_BOOL PE_InitDLL( WINE_MODREF *wm, DWORD type, LPVOID lpReserved ) return retv; } -/************************************************************************ - * PE_InitTls (internal) - * - * If included, initialises the thread local storages of modules. - * Pointers in those structs are not RVAs but real pointers which have been - * relocated by do_relocations() already. - */ static LPVOID _fixup_address(PIMAGE_OPTIONAL_HEADER opt,int delta,LPVOID addr) { if ( ((DWORD)addr>opt->ImageBase) && diff --git a/libs/winloader/registry.c b/libs/winloader/registry.c index e118b12870..680ffc88c9 100644 --- a/libs/winloader/registry.c +++ b/libs/winloader/registry.c @@ -1,408 +1,410 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -struct reg_value -{ - int type; - char* name; - int len; - char* value; -}; - -static int reg_size=0; - -static struct reg_value* regs=0; - -struct reg_handle_s; -typedef struct reg_handle_s -{ - int handle; - char* name; - struct reg_handle_s* next; - struct reg_handle_s* prev; -} reg_handle_t; - -static reg_handle_t* head=0; - -#define DIR -25 - -static void create_registry(); -static void open_registry(); -static void save_registry(); - - - -static void create_registry() -{ - if(regs) - { - printf("Logic error: create_registry() called with existing registry\n"); - save_registry(); - return; - } - regs=(struct reg_value*)malloc(3*sizeof(struct reg_value)); - regs[0].type=regs[1].type=DIR; - regs[0].name=strdup("HKLM"); - regs[1].name=strdup("HKCU"); - regs[0].value=regs[1].value=NULL; - regs[0].len=regs[1].len=0; - reg_size=2; - save_registry(); -} -static void open_registry() -{ - int fd; - int i; - int len; - char user_conf[PATH_MAX+1]; - if(regs) - { - printf("Multiple open_registry(>\n"); - return; - } - snprintf(user_conf, PATH_MAX, "%s/.gstreamer/win32/registry", getenv("HOME")); - fd=open(user_conf, O_RDONLY); - if(fd==-1) - { - printf("Creating new registry\n"); - create_registry(); - return; - } - read(fd, ®_size, 4); - regs=(struct reg_value*)malloc(reg_size*sizeof(struct reg_value)); - for(i=0; iprev) - { - if(!strcmp(t->name, name)) - { - return t; - } - } - return 0; -} -static struct reg_value* find_value_by_name(const char* name) -{ - int i; - for(i=0; iprev) - { - if(t->handle==handle) - { - return t; - } - } - return 0; -} -static int generate_handle() -{ - static int zz=249; - zz++; - while((zz==HKEY_LOCAL_MACHINE) || (zz==HKEY_CURRENT_USER)) - zz++; - return zz; -} - -static reg_handle_t* insert_handle(long handle, const char* name) -{ - reg_handle_t* t; - t=(reg_handle_t*)malloc(sizeof(reg_handle_t)); - if(head==0) - { - t->prev=0; - } - else - { - head->next=t; - t->prev=head; - } - t->next=0; - t->name=strdup(name); - t->handle=handle; - head=t; - return t; -} -static char* build_keyname(long key, const char* subkey) -{ - char* full_name; - reg_handle_t* t; - if((t=find_handle(key))==0) - { - printf("Invalid key\n"); - return NULL; - } - if(subkey==NULL) - subkey=""; - full_name=(char*)malloc(strlen(t->name)+strlen(subkey)+10); - strcpy(full_name, t->name); - strcat(full_name, "\\"); - strcat(full_name, subkey); - return full_name; -} -struct reg_value* insert_reg_value(int handle, const char* name, int type, void* value, int len) -{ - reg_handle_t* t; - struct reg_value* v; - char* fullname; - if((fullname=build_keyname(handle, name))==NULL) - { - printf("Invalid handle\n"); - return NULL; - } - - if((v=find_value_by_name(fullname))==0) - //creating new value in registry - { - if(regs==0) - create_registry(); - regs=(struct reg_value*)realloc(regs, sizeof(struct reg_value)*(reg_size+1)); - v=regs+reg_size; - reg_size++; - } - else - //replacing old one - { - free(v->value); - free(v->name); - } - v->type=type; - v->len=len; - v->value=(char*)malloc(len); - memcpy(v->value, value, len); - v->name=strdup(fullname); - save_registry(); - return v; -} - -static void init_registry() -{ - printf("Initializing registry\n"); - open_registry(); - insert_handle(HKEY_LOCAL_MACHINE, "HKLM"); - insert_handle(HKEY_CURRENT_USER, "HKCU"); -} -static reg_handle_t* find_handle_2(long key, char* subkey) -{ - char* full_name; - reg_handle_t* t; - if((t=find_handle(key))==0) - { - printf("Invalid key\n"); - return (reg_handle_t*)-1; - } - if(subkey==NULL) - return t; - full_name=(char*)malloc(strlen(t->name)+strlen(subkey)+10); - strcpy(full_name, t->name); - strcat(full_name, "\\"); - strcat(full_name, subkey); - t=find_handle_by_name(full_name); - free(full_name); - return t; -} - -long RegOpenKeyExA(long key, char* subkey, long reserved, long access, int* newkey) -{ - char* full_name; - reg_handle_t* t; - struct reg_value* v; - printf("Opening key %s\n", subkey); - - if(!regs) - init_registry(); - -/* t=find_handle_2(key, subkey); - - if(t==0) - return -1; - - if(t==(reg_handle_t*)-1) - return -1; -*/ - full_name=build_keyname(key, subkey); - if(!full_name) - return -1; - v=find_value_by_name(full_name); - - t=insert_handle(generate_handle(), full_name); - *newkey=t->handle; - free(full_name); - - return 0; -} -long RegCloseKey(long key) -{ - reg_handle_t *handle; - if(key==HKEY_LOCAL_MACHINE) - return 0; - if(key==HKEY_CURRENT_USER) - return 0; - handle=find_handle(key); - if(handle==0) - return 0; - if(handle->prev) - handle->prev->next=handle->next; - if(handle->next) - handle->next->prev=handle->prev; - if(handle->name) - free(handle->name); - if(handle==head) - head=head->prev; - free(handle); - return 1; -} -long RegQueryValueExA(long key, char* value, int* reserved, int* type, int* data, int* count) -{ - struct reg_value* t; - char* c; - printf("Querying value %s\n", value); - if(!regs) - init_registry(); - - c=build_keyname(key, value); - if(c==NULL) - return 1; - if((t=find_value_by_name(c))==0) - { - free(c); - return 2; - } - free(c); - if(type) - *type=t->type; - if(data) - memcpy(data, t->value, (t->len<*count)?t->len:*count); - if(count) - { - if(*countlen) - { - *count=t->len; - return ERROR_MORE_DATA; - }else return 0; - } - return 0; -} -long RegCreateKeyExA(long key, char* name, long reserved, - void* classs, long options, long security, - void* sec_attr, int* newkey, int* status) -{ - reg_handle_t* t; - char* fullname; - struct reg_value* v; - printf("Creating/Opening key %s\n", name); - if(!regs) - init_registry(); - - fullname=build_keyname(key, name); - if(fullname==NULL) - return 1; - v=find_value_by_name(fullname); - if(v==0) - { - int qw=45708; - v=insert_reg_value(key, name, DIR, &qw, 4); - *status=REG_CREATED_NEW_KEY; -// return 0; - } - else - *status=REG_OPENED_EXISTING_KEY; - - t=insert_handle(generate_handle(), fullname); - *newkey=t->handle; - free(fullname); - return 0; -} -long RegSetValueExA(long key, char* name, long v1, long v2, void* data, long size) -{ - struct reg_value* t; - char* c; - printf("Request to set value %s\n", name); - if(!regs) - init_registry(); - - c=build_keyname(key, name); - if(c==NULL) - return 1; - insert_reg_value(key, name, v2, data, size); - free(c); - return 0; -} +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +struct reg_value +{ + int type; + char* name; + int len; + char* value; +}; + +static int reg_size=0; + +static struct reg_value* regs=0; + +struct reg_handle_s; +typedef struct reg_handle_s +{ + int handle; + char* name; + struct reg_handle_s* next; + struct reg_handle_s* prev; +} reg_handle_t; + +static reg_handle_t* head=0; + +#define DIR -25 + +static void create_registry(); +static void open_registry(); +static void save_registry(); + + + + +static void create_registry(){ + if(regs) + { + printf("Logic error: create_registry() called with existing registry\n"); + save_registry(); + return; + } + regs=(struct reg_value*)malloc(3*sizeof(struct reg_value)); + regs[0].type=regs[1].type=DIR; + regs[0].name=(char*)malloc(5); + strcpy(regs[0].name, "HKLM"); + regs[1].name=(char*)malloc(5); + strcpy(regs[1].name, "HKCU"); + regs[0].value=regs[1].value=NULL; + regs[0].len=regs[1].len=0; + reg_size=2; + save_registry(); +} +static void open_registry() +{ + int fd; + int i; + int len; + struct passwd* pwent; + char* pathname; + if(regs) + { + printf("Multiple open_registry(>\n"); + return; + } + pwent=getpwuid(getuid()); + pathname=(char*)malloc(strlen(pwent->pw_dir)+20); + strcpy(pathname, pwent->pw_dir); + strcat(pathname, "/.registry"); + fd=open(pathname, O_RDONLY); + free(pathname); + if(fd==-1) + { + printf("Creating new registry\n"); + create_registry(); + return; + } + read(fd, ®_size, 4); + regs=(struct reg_value*)malloc(reg_size*sizeof(struct reg_value)); + for(i=0; ipw_dir)+20); + strcpy(pathname, pwent->pw_dir); + strcat(pathname, "/.registry"); + fd=open(pathname, O_WRONLY | O_CREAT, 00777); + free(pathname); + if(fd==-1) + { + printf("Failed to open registry file for writing.\n"); + return; + } + write(fd, ®_size, 4); + for(i=0; iprev) + { + if(!strcmp(t->name, name)) + { + return t; + } + } + return 0; +} +static struct reg_value* find_value_by_name(const char* name) +{ + int i; + for(i=0; iprev) + { + if(t->handle==handle) + { + return t; + } + } + return 0; +} +static int generate_handle() +{ + static int zz=249; + zz++; + while((zz==HKEY_LOCAL_MACHINE) || (zz==HKEY_CURRENT_USER)) + zz++; + return zz; +} + +static reg_handle_t* insert_handle(long handle, const char* name) +{ + reg_handle_t* t; + t=(reg_handle_t*)malloc(sizeof(reg_handle_t)); + if(head==0) + { + t->prev=0; + } + else + { + head->next=t; + t->prev=head; + } + t->next=0; + t->name=(char*)malloc(strlen(name)+1); + strcpy(t->name, name); + t->handle=handle; + head=t; + return t; +} +static char* build_keyname(long key, const char* subkey) +{ + char* full_name; + reg_handle_t* t; + if((t=find_handle(key))==0) + { + TRACE("Invalid key\n"); + return NULL; + } + if(subkey==NULL) + subkey=""; + full_name=(char*)malloc(strlen(t->name)+strlen(subkey)+10); + strcpy(full_name, t->name); + strcat(full_name, "\\"); + strcat(full_name, subkey); + return full_name; +} +struct reg_value* insert_reg_value(int handle, const char* name, int type, void* value, int len) +{ + reg_handle_t* t; + struct reg_value* v; + char* fullname; + if((fullname=build_keyname(handle, name))==NULL) + { + TRACE("Invalid handle\n"); + return NULL; + } + + if((v=find_value_by_name(fullname))==0) + //creating new value in registry + { + if(regs==0) + create_registry(); + regs=(struct reg_value*)realloc(regs, sizeof(struct reg_value)*(reg_size+1)); + v=regs+reg_size; + reg_size++; + } + else + //replacing old one + { + free(v->value); + free(v->name); + } + v->type=type; + v->len=len; + v->value=(char*)malloc(len); + memcpy(v->value, value, len); + v->name=(char*)malloc(strlen(fullname)+1); + strcpy(v->name, fullname); + save_registry(); + return v; +} + +static void init_registry() +{ + printf("Initializing registry\n"); + open_registry(); + insert_handle(HKEY_LOCAL_MACHINE, "HKLM"); + insert_handle(HKEY_CURRENT_USER, "HKCU"); +} +static reg_handle_t* find_handle_2(long key, const char* subkey) +{ + char* full_name; + reg_handle_t* t; + if((t=find_handle(key))==0) + { + TRACE("Invalid key\n"); + return (reg_handle_t*)-1; + } + if(subkey==NULL) + return t; + full_name=(char*)malloc(strlen(t->name)+strlen(subkey)+10); + strcpy(full_name, t->name); + strcat(full_name, "\\"); + strcat(full_name, subkey); + t=find_handle_by_name(full_name); + free(full_name); + return t; +} + +long RegOpenKeyExA(long key, const char* subkey, long reserved, long access, int* newkey) +{ + char* full_name; + reg_handle_t* t; + struct reg_value* v; + TRACE("Opening key %s\n", subkey); + + if(!regs) + init_registry() +; +/* t=find_handle_2(key, subkey); + + if(t==0) + return -1; + + if(t==(reg_handle_t*)-1) + return -1; + +*/ full_name=build_keyname(key, subkey); + if(!full_name) + return -1; + v=find_value_by_name(full_name); + + t=insert_handle(generate_handle(), full_name); + *newkey=t->handle; + free(full_name); + + return 0; +} +long RegCloseKey(long key) +{ + reg_handle_t *handle; + if(key==HKEY_LOCAL_MACHINE) + return 0; + if(key==HKEY_CURRENT_USER) + return 0; + handle=find_handle(key); + if(handle==0) + return 0; + if(handle->prev) + handle->prev->next=handle->next; + if(handle->next) + handle->next->prev=handle->prev; + if(handle->name) + free(handle->name); + if(handle==head) + head=head->prev; + free(handle); + return 1; +} +long RegQueryValueExA(long key, const char* value, int* reserved, int* type, int* data, int* count) +{ + struct reg_value* t; + char* c; + TRACE("Querying value %s\n", value); + if(!regs) + init_registry() +; + c=build_keyname(key, value); + if(c==NULL) + return 1; + if((t=find_value_by_name(c))==0) + { + free(c); + return 2; + } + free(c); + if(type) + *type=t->type; + if(data) + memcpy(data, t->value, (t->len<*count)?t->len:*count); + if(count) + { + if(*countlen) + { + *count=t->len; + return ERROR_MORE_DATA; + }else return 0; + } + return 0; +} +long RegCreateKeyExA(long key, const char* name, long reserved, + void* classs, long options, long security, + void* sec_attr, int* newkey, int* status) +{ + reg_handle_t* t; + char* fullname; + struct reg_value* v; + TRACE("Creating/Opening key %s\n", name); + if(!regs) + init_registry() +; + fullname=build_keyname(key, name); + if(fullname==NULL) + return 1; + v=find_value_by_name(fullname); + if(v==0) + { + int qw=45708; + v=insert_reg_value(key, name, DIR, &qw, 4); + *status=REG_CREATED_NEW_KEY; +// return 0; + } + else + *status=REG_OPENED_EXISTING_KEY; + + t=insert_handle(generate_handle(), fullname); + *newkey=t->handle; + free(fullname); + return 0; +} +long RegSetValueExA(long key, const char* name, long v1, long v2, void* data, long size) +{ + struct reg_value* t; + char* c; + TRACE("Request to set value %s\n", name); + if(!regs) + init_registry() +; + c=build_keyname(key, name); + if(c==NULL) + return 1; + insert_reg_value(key, name, v2, data, size); + free(c); + return 0; +} diff --git a/libs/winloader/vfl.c b/libs/winloader/vfl.c index 0236647e18..b6e26a4d8b 100644 --- a/libs/winloader/vfl.c +++ b/libs/winloader/vfl.c @@ -31,6 +31,21 @@ HIC VFWAPI ICLocate(long fccType, long fccHandler, LPBITMAPINFOHEADER lpbiIn, LP #define OpenDriverA DrvOpen extern HDRVR VFWAPI DrvOpen(long); +#define STORE_ALL \ + __asm__ ( \ + "push %%ebx\n\t" \ + "push %%ecx\n\t" \ + "push %%edx\n\t" \ + "push %%esi\n\t" \ + "push %%edi\n\t"::) + +#define REST_ALL \ + __asm__ ( \ + "pop %%edi\n\t" \ + "pop %%esi\n\t" \ + "pop %%edx\n\t" \ + "pop %%ecx\n\t" \ + "pop %%ebx\n\t"::) typedef struct { @@ -249,7 +264,7 @@ ICCompress( long VFWAPIV ICDecompress(HIC hic,long dwFlags,LPBITMAPINFOHEADER lpbiFormat,void* lpData,LPBITMAPINFOHEADER lpbi,void* lpBits) { ICDECOMPRESS icd; - + int result; icd.dwFlags = dwFlags; icd.lpbiInput = lpbiFormat; icd.lpInput = lpData; @@ -257,7 +272,10 @@ ICDecompress(HIC hic,long dwFlags,LPBITMAPINFOHEADER lpbiFormat,void* lpData,LPB icd.lpbiOutput = lpbi; icd.lpOutput = lpBits; icd.ckid = 0; - return ICSendMessage(hic,ICM_DECOMPRESS,(long)&icd,sizeof(icd)); + STORE_ALL; + result=ICSendMessage(hic,ICM_DECOMPRESS,(long)&icd,sizeof(icd)); + REST_ALL; + return result; } /*********************************************************************** @@ -267,7 +285,7 @@ LRESULT VFWAPI ICSendMessage(HIC hic,unsigned int msg,long lParam1,long lParam2) { LRESULT ret; WINE_HIC *whic = (WINE_HIC*)hic; - + char qw[200]; #define XX(x) case x: TRACE("(0x%08lx,"#x",0x%08lx,0x%08lx)\n",(long)hic,lParam1,lParam2);break; /* switch (msg) { @@ -317,25 +335,12 @@ ICSendMessage(HIC hic,unsigned int msg,long lParam1,long lParam2) { */ // if (whic->driverproc) { // FIXME("(0x%08lx,0x%08lx,0x%08lx,0x%08lx), calling %p\n",(long)hic,(long)msg,lParam1,lParam2,whic->driverproc); -#define STORE_ALL \ - __asm__ ( \ - "push %%ebx\n\t" \ - "push %%ecx\n\t" \ - "push %%edx\n\t" \ - "push %%esi\n\t" \ - "push %%edi\n\t"::) - -#define REST_ALL \ - __asm__ ( \ - "pop %%edi\n\t" \ - "pop %%esi\n\t" \ - "pop %%edx\n\t" \ - "pop %%ecx\n\t" \ - "pop %%ebx\n\t"::) // printf("private=%x\n", whic->private); + __asm__ __volatile__ ("fsave (%0)\n\t": :"r"(&qw)); STORE_ALL; - ret = whic->driverproc(whic->private,1,msg,lParam1,lParam2); + ret = whic->driverproc(whic->private,1,msg,lParam1,lParam2); REST_ALL; + __asm__ __volatile__ ("frstor (%0)\n\t": :"r"(&qw)); // } else // ret = SendDriverMessage(whic->hdrv,msg,lParam1,lParam2); diff --git a/libs/winloader/wineacm.h b/libs/winloader/wineacm.h new file mode 100644 index 0000000000..f215d754d7 --- /dev/null +++ b/libs/winloader/wineacm.h @@ -0,0 +1,55 @@ +/* -*- tab-width: 8; c-basic-offset: 4 -*- */ + +/*********************************************************************** + * Wine specific - Win32 + */ +typedef struct _WINE_ACMDRIVERID *PWINE_ACMDRIVERID; +typedef struct _WINE_ACMDRIVER *PWINE_ACMDRIVER; + +typedef struct _WINE_ACMOBJ +{ + PWINE_ACMDRIVERID pACMDriverID; +} WINE_ACMOBJ, *PWINE_ACMOBJ; + +typedef struct _WINE_ACMDRIVER +{ + WINE_ACMOBJ obj; + HDRVR hDrvr; + DRIVERPROC pfnDriverProc; + PWINE_ACMDRIVER pNextACMDriver; +} WINE_ACMDRIVER; + +typedef struct _WINE_ACMSTREAM +{ + WINE_ACMOBJ obj; + PWINE_ACMDRIVER pDrv; + ACMDRVSTREAMINSTANCE drvInst; + HACMDRIVER hAcmDriver; +} WINE_ACMSTREAM, *PWINE_ACMSTREAM; + +typedef struct _WINE_ACMDRIVERID +{ + LPSTR pszDriverAlias; + LPSTR pszFileName; + HINSTANCE hInstModule; /* NULL if global */ + DWORD dwProcessID; /* ID of process which installed a local driver */ + WIN_BOOL bEnabled; + PWINE_ACMDRIVER pACMDriverList; + PWINE_ACMDRIVERID pNextACMDriverID; + PWINE_ACMDRIVERID pPrevACMDriverID; +} WINE_ACMDRIVERID; + +/* From internal.c */ +extern HANDLE MSACM_hHeap; +extern PWINE_ACMDRIVERID MSACM_pFirstACMDriverID; +extern PWINE_ACMDRIVERID MSACM_pLastACMDriverID; +PWINE_ACMDRIVERID MSACM_RegisterDriver( + LPSTR pszDriverAlias, LPSTR pszFileName, + HINSTANCE hinstModule); +void MSACM_RegisterAllDrivers(void); +PWINE_ACMDRIVERID MSACM_UnregisterDriver(PWINE_ACMDRIVERID p); +void MSACM_UnregisterAllDrivers(void); +PWINE_ACMDRIVERID MSACM_GetDriverID(HACMDRIVERID hDriverID); +PWINE_ACMDRIVER MSACM_GetDriver(HACMDRIVER hDriver); +PWINE_ACMOBJ MSACM_GetObj(HACMOBJ hObj); +