Added general IDCT library. Added IEEE testbed.

Original commit message from CVS:
Added general IDCT library. Added IEEE testbed.
Update to the windows codec loader.
fixes in the riff parser/encoder.
cleanup in the getbits/putbits API.
This commit is contained in:
Wim Taymans 2000-08-14 10:11:04 +00:00
parent 82acefa7b0
commit 001bbb7002
29 changed files with 4880 additions and 981 deletions

View file

@ -1,3 +1,3 @@
SUBDIRS = riff colorspace getbits putbits videoscale winloader
SUBDIRS = riff colorspace getbits putbits videoscale winloader idct
DIST_SUBDIRS = riff colorspace getbits putbits videoscale winloader
DIST_SUBDIRS = riff colorspace getbits putbits videoscale winloader idct

View file

@ -200,9 +200,9 @@ void getbits_back_int(gst_getbits_t *gb,unsigned long bits) {
}
}
gb->dword = bswap_32(*((unsigned long *)(gb->ptr)));
fprintf(stderr,"orignal new loaded word is %08x\n",gb->dword);
fprintf(stderr,"orignal new loaded word is %08lx\n",gb->dword);
gb->dword <<= (32 - gb->bits);
fprintf(stderr,"shifted (by %lu) word is %08x\n",gb->bits,gb->dword);
fprintf(stderr,"shifted (by %lu) word is %08lx\n",gb->bits,gb->dword);
}
void getbits_byteback_int(gst_getbits_t *gb,unsigned long bytes) {

View file

@ -277,7 +277,7 @@ extern unsigned long gst_getbits_nBitMask[];
}
#define gst_flushbits(gb, num) \
#define gst_flushbitsn(gb, num) \
{ \
(gb)->bits += num; \
\

19
libs/idct/Makefile.am Normal file
View file

@ -0,0 +1,19 @@
filterdir = $(libdir)/gst
filter_LTLIBRARIES = libgstidct.la
libgstidct_la_SOURCES = fastintidct.c floatidct.c gstidct.c intidct.c mmxidct.S mmx32idct.c
bin_PROGRAMS = ieeetest
ieeetest_SOURCES = ieeetest.c
ieeetest_LDADD = libgstidct.la $(GLIB_LIBS) $(GTK_LIBS) $(top_srcdir)/gst/libgst.la
ieeetest_CFLAGS = $(shell gnome-config --cflags gnomeui) -g -Wall
ieeetest_LDFLAGS = $(shell gnome-config --libs gnomeui)
noinst_HEADERS = gstidct.h
CFLAGS += -Wall -O2 -funroll-all-loops -finline-functions -ffast-math
INCLUDES = $(GLIB_CFLAGS) $(GTK_CFLAGS) -I$(top_srcdir) -I$(top_srcdir)/include
LDADD = $(GLIB_LIBS) $(GTK_LIBS) $(top_srcdir)/gst/libgst.la

48
libs/idct/README Normal file
View file

@ -0,0 +1,48 @@
This archive contains a quick & dirty implementation of the IEEE Standard
1180-1990 accuracy test for inverse DCT. It is not guaranteed to be
correct ... but if you find any bugs, please let me know (by email to
tgl@cs.cmu.edu).
The test harness consists of the C program ieeetest.c and shell script
doieee. For comparison purposes I have also supplied a copy of jrevdct.c,
the inverse DCT routine from release 4 of the Independent JPEG Group's
free JPEG software. (jrevdct.c is slightly modified from the IJG release
so that it will compile without the IJG include files.) jrevdct.c passes
the 1180 test --- or at least, this program thinks so. jrevdct.out is
the output from a test run.
Note that numerical results may vary somewhat across machines. This appears
to be mostly due to differing results from the cosine function.
INSTALLATION:
Check the Makefile, change CC and CFLAGS if needed. Then say "make".
If your C compiler is non-ANSI, you may need to change includes and/or
function headers.
To test a different IDCT routine, link with that routine instead of
jrevdct.o. You will need to modify dct.h and/or ieeetest.c if your
routine's calling convention is not in-place modification of an array
of 64 "short"s.
USAGE:
The standard test procedure is
doieee ieeetest >outputfile
Expect it to take a while (almost 80 minutes on my old 68030 box).
Each of the six passes will emit a row of 100 dots as it runs.
You can grep the output for the word FAILS if you just want to know
yea or nay.
LEGAL MUMBO-JUMBO:
I hereby release the test harness to the public domain.
Thomas G. Lane, 22 Nov 1993
IMPORTANT: jrevdct.c is NOT public domain, but is copyrighted free software
(not the same thing at all). It is subject to IJG's distribution terms, which
primarily state that if you incorporate it into a program you must acknowledge
IJG's contribution in your program documentation. For more details and the
complete IJG software, see the IJG FTP archive at ftp.uu.net, in directory
/graphics/jpeg.

29
libs/idct/dct.h Normal file
View file

@ -0,0 +1,29 @@
/* define DCT types */
/*
* DCTSIZE underlying (1d) transform size
* DCTSIZE2 DCTSIZE squared
*/
#define DCTSIZE (8)
#define DCTSIZE2 (DCTSIZE*DCTSIZE)
#define EIGHT_BIT_SAMPLES /* needed in jrevdct.c */
typedef short DCTELEM; /* must be at least 16 bits */
typedef DCTELEM DCTBLOCK[DCTSIZE2];
typedef long INT32; /* must be at least 32 bits */
extern void gst_idct_int_idct();
extern void gst_idct_init_fast_int_idct (void);
extern void gst_idct_fast_int_idct (short *block);
extern void gst_idct_mmx_idct (short *block);
extern void gst_idct_mmx32_idct (short *block);
extern void gst_idct_init_float_idct(void);
extern void gst_idct_float_idct (short *block);

15
libs/idct/doieee Executable file
View file

@ -0,0 +1,15 @@
# perform IEEE 1180 test series
# Typical usage:
# doieee >outfile
# where progname is ieeetest or a variant
for i in 1 2 3 4 5;
do
time ./ieeetest $i -256 255 1 10000
time ./ieeetest $i -5 5 1 10000
time ./ieeetest $i -300 300 1 10000
time ./ieeetest $i -256 255 -1 10000
time ./ieeetest $i -5 5 -1 10000
time ./ieeetest $i -300 300 -1 10000
done

207
libs/idct/fastintidct.c Normal file
View file

@ -0,0 +1,207 @@
/* idct.c, inverse fast discrete cosine transform */
/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
/*
* Disclaimer of Warranty
*
* These software programs are available to the user without any license fee or
* royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
* any and all warranties, whether express, implied, or statuary, including any
* implied warranties or merchantability or of fitness for a particular
* purpose. In no event shall the copyright-holder be liable for any
* incidental, punitive, or consequential damages of any kind whatsoever
* arising from the use of these programs.
*
* This disclaimer of warranty extends to the user of these programs and user's
* customers, employees, agents, transferees, successors, and assigns.
*
* The MPEG Software Simulation Group does not represent or warrant that the
* programs furnished hereunder are free of infringement of any third-party
* patents.
*
* Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
* are subject to royalty fees to patent holders. Many of these patents are
* general enough such that they are unavoidable regardless of implementation
* design.
*
*/
/**********************************************************/
/* inverse two dimensional DCT, Chen-Wang algorithm */
/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */
/* 32-bit integer arithmetic (8 bit coefficients) */
/* 11 mults, 29 adds per DCT */
/* sE, 18.8.91 */
/**********************************************************/
/* coefficients extended to 12 bit for IEEE1180-1990 */
/* compliance sE, 2.1.94 */
/**********************************************************/
/* this code assumes >> to be a two's-complement arithmetic */
/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */
#define W1 2841 /* 2048*sqrt(2)*cos(1*pi/16) */
#define W2 2676 /* 2048*sqrt(2)*cos(2*pi/16) */
#define W3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
#define W5 1609 /* 2048*sqrt(2)*cos(5*pi/16) */
#define W6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
#define W7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
#include "dct.h"
/* private data */
static short iclip[1024]; /* clipping table */
static short *iclp;
/* private prototypes */
static void idctrow (short *blk);
static void idctcol (short *blk);
/* row (horizontal) IDCT
*
* 7 pi 1
* dst[k] = sum c[l] * src[l] * cos( -- * ( k + - ) * l )
* l=0 8 2
*
* where: c[0] = 128
* c[1..7] = 128*sqrt(2)
*/
static void idctrow(blk)
short *blk;
{
int x0, x1, x2, x3, x4, x5, x6, x7, x8;
/* shortcut */
if (!((x1 = blk[4]<<11) | (x2 = blk[6]) | (x3 = blk[2]) |
(x4 = blk[1]) | (x5 = blk[7]) | (x6 = blk[5]) | (x7 = blk[3])))
{
blk[0]=blk[1]=blk[2]=blk[3]=blk[4]=blk[5]=blk[6]=blk[7]=blk[0]<<3;
return;
}
x0 = (blk[0]<<11) + 128; /* for proper rounding in the fourth stage */
/* first stage */
x8 = W7*(x4+x5);
x4 = x8 + (W1-W7)*x4;
x5 = x8 - (W1+W7)*x5;
x8 = W3*(x6+x7);
x6 = x8 - (W3-W5)*x6;
x7 = x8 - (W3+W5)*x7;
/* second stage */
x8 = x0 + x1;
x0 -= x1;
x1 = W6*(x3+x2);
x2 = x1 - (W2+W6)*x2;
x3 = x1 + (W2-W6)*x3;
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
/* third stage */
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = (181*(x4+x5)+128)>>8;
x4 = (181*(x4-x5)+128)>>8;
/* fourth stage */
blk[0] = (x7+x1)>>8;
blk[1] = (x3+x2)>>8;
blk[2] = (x0+x4)>>8;
blk[3] = (x8+x6)>>8;
blk[4] = (x8-x6)>>8;
blk[5] = (x0-x4)>>8;
blk[6] = (x3-x2)>>8;
blk[7] = (x7-x1)>>8;
}
/* column (vertical) IDCT
*
* 7 pi 1
* dst[8*k] = sum c[l] * src[8*l] * cos( -- * ( k + - ) * l )
* l=0 8 2
*
* where: c[0] = 1/1024
* c[1..7] = (1/1024)*sqrt(2)
*/
static void idctcol(blk)
short *blk;
{
int x0, x1, x2, x3, x4, x5, x6, x7, x8;
/* shortcut */
if (!((x1 = (blk[8*4]<<8)) | (x2 = blk[8*6]) | (x3 = blk[8*2]) |
(x4 = blk[8*1]) | (x5 = blk[8*7]) | (x6 = blk[8*5]) | (x7 = blk[8*3])))
{
blk[8*0]=blk[8*1]=blk[8*2]=blk[8*3]=blk[8*4]=blk[8*5]=blk[8*6]=blk[8*7]=
iclp[(blk[8*0]+32)>>6];
return;
}
x0 = (blk[8*0]<<8) + 8192;
/* first stage */
x8 = W7*(x4+x5) + 4;
x4 = (x8+(W1-W7)*x4)>>3;
x5 = (x8-(W1+W7)*x5)>>3;
x8 = W3*(x6+x7) + 4;
x6 = (x8-(W3-W5)*x6)>>3;
x7 = (x8-(W3+W5)*x7)>>3;
/* second stage */
x8 = x0 + x1;
x0 -= x1;
x1 = W6*(x3+x2) + 4;
x2 = (x1-(W2+W6)*x2)>>3;
x3 = (x1+(W2-W6)*x3)>>3;
x1 = x4 + x6;
x4 -= x6;
x6 = x5 + x7;
x5 -= x7;
/* third stage */
x7 = x8 + x3;
x8 -= x3;
x3 = x0 + x2;
x0 -= x2;
x2 = (181*(x4+x5)+128)>>8;
x4 = (181*(x4-x5)+128)>>8;
/* fourth stage */
blk[8*0] = iclp[(x7+x1)>>14];
blk[8*1] = iclp[(x3+x2)>>14];
blk[8*2] = iclp[(x0+x4)>>14];
blk[8*3] = iclp[(x8+x6)>>14];
blk[8*4] = iclp[(x8-x6)>>14];
blk[8*5] = iclp[(x0-x4)>>14];
blk[8*6] = iclp[(x3-x2)>>14];
blk[8*7] = iclp[(x7-x1)>>14];
}
/* two dimensional inverse discrete cosine transform */
void gst_idct_fast_int_idct(block)
short *block;
{
int i;
for (i=0; i<8; i++)
idctrow(block+8*i);
for (i=0; i<8; i++)
idctcol(block+i);
}
void gst_idct_init_fast_int_idct()
{
int i;
iclp = iclip+512;
for (i= -512; i<512; i++)
iclp[i] = (i<-256) ? -256 : ((i>255) ? 255 : i);
}

102
libs/idct/floatidct.c Normal file
View file

@ -0,0 +1,102 @@
/* Reference_IDCT.c, Inverse Discrete Fourier Transform, double precision */
/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
/*
* Disclaimer of Warranty
*
* These software programs are available to the user without any license fee or
* royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
* any and all warranties, whether express, implied, or statuary, including any
* implied warranties or merchantability or of fitness for a particular
* purpose. In no event shall the copyright-holder be liable for any
* incidental, punitive, or consequential damages of any kind whatsoever
* arising from the use of these programs.
*
* This disclaimer of warranty extends to the user of these programs and user's
* customers, employees, agents, transferees, successors, and assigns.
*
* The MPEG Software Simulation Group does not represent or warrant that the
* programs furnished hereunder are free of infringement of any third-party
* patents.
*
* Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
* are subject to royalty fees to patent holders. Many of these patents are
* general enough such that they are unavoidable regardless of implementation
* design.
*
*/
/* Perform IEEE 1180 reference (64-bit floating point, separable 8x1
* direct matrix multiply) Inverse Discrete Cosine Transform
*/
/* Here we use math.h to generate constants. Compiler results may
vary a little */
#include <math.h>
#ifndef PI
# ifdef M_PI
# define PI M_PI
# else
# define PI 3.14159265358979323846
# endif
#endif
/* private data */
/* cosine transform matrix for 8x1 IDCT */
static double gst_idct_float_c[8][8];
/* initialize DCT coefficient matrix */
void gst_idct_init_float_idct()
{
int freq, time;
double scale;
for (freq=0; freq < 8; freq++)
{
scale = (freq == 0) ? sqrt(0.125) : 0.5;
for (time=0; time<8; time++)
gst_idct_float_c[freq][time] = scale*cos((PI/8.0)*freq*(time + 0.5));
}
}
/* perform IDCT matrix multiply for 8x8 coefficient block */
void gst_idct_float_idct(block)
short *block;
{
int i, j, k, v;
double partial_product;
double tmp[64];
for (i=0; i<8; i++)
for (j=0; j<8; j++)
{
partial_product = 0.0;
for (k=0; k<8; k++)
partial_product+= gst_idct_float_c[k][j]*block[8*i+k];
tmp[8*i+j] = partial_product;
}
/* Transpose operation is integrated into address mapping by switching
loop order of i and j */
for (j=0; j<8; j++)
for (i=0; i<8; i++)
{
partial_product = 0.0;
for (k=0; k<8; k++)
partial_product+= gst_idct_float_c[k][i]*tmp[8*k+j];
v = (int) floor(partial_product+0.5);
block[8*i+j] = (v<-256) ? -256 : ((v>255) ? 255 : v);
}
}

111
libs/idct/gstidct.c Normal file
View file

@ -0,0 +1,111 @@
/* Gnome-Streamer
* Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include <config.h>
#include "gstidct.h"
#include "dct.h"
static void gst_idct_int_sparse_idct(short *data);
GstIDCT *gst_idct_new(GstIDCTMethod method)
{
GstIDCT *new = g_malloc(sizeof(GstIDCT));
new->need_transpose = FALSE;
if (method == GST_IDCT_DEFAULT) {
#ifdef HAVE_LIBMMX
method = GST_IDCT_MMX32;
#else
method = GST_IDCT_FAST_INT;
#endif
}
new->convert_sparse = gst_idct_int_sparse_idct;
switch (method) {
case GST_IDCT_FAST_INT:
g_print("GstIDCT: using fast_int_idct\n");
gst_idct_init_fast_int_idct();
new->convert = gst_idct_fast_int_idct;
break;
case GST_IDCT_INT:
g_print("GstIDCT: using int_idct\n");
new->convert = gst_idct_int_idct;
break;
case GST_IDCT_FLOAT:
g_print("GstIDCT: using float_idct\n");
gst_idct_init_float_idct();
new->convert = gst_idct_float_idct;
break;
case GST_IDCT_MMX:
g_print("GstIDCT: using MMX_idct\n");
new->convert = gst_idct_mmx_idct;
new->need_transpose = TRUE;
break;
case GST_IDCT_MMX32:
g_print("GstIDCT: using MMX32_idct\n");
new->convert = gst_idct_mmx32_idct;
new->need_transpose = TRUE;
break;
default:
g_print("GstIDCT: method not supported\n");
g_free(new);
return NULL;
}
return new;
}
static void gst_idct_int_sparse_idct(short *data)
{
short val;
gint32 v, *dp = (guint32 *)data;
v = *data;
if (v < 0) {
val = -v;
val += (8 >> 1);
val /= 8;
val = -val;
}
else {
val = (v + (8 >> 1)) / 8;
}
v = (( val & 0xffff) | (val << 16));
dp[0] = v; dp[1] = v; dp[2] = v; dp[3] = v;
dp[4] = v; dp[5] = v; dp[6] = v; dp[7] = v;
dp[8] = v; dp[9] = v; dp[10] = v; dp[11] = v;
dp[12] = v; dp[13] = v; dp[14] = v; dp[15] = v;
dp[16] = v; dp[17] = v; dp[18] = v; dp[19] = v;
dp[20] = v; dp[21] = v; dp[22] = v; dp[23] = v;
dp[24] = v; dp[25] = v; dp[26] = v; dp[27] = v;
dp[28] = v; dp[29] = v; dp[30] = v; dp[31] = v;
}
void gst_idct_destroy(GstIDCT *idct)
{
g_return_if_fail(idct != NULL);
g_free(idct);
}

53
libs/idct/gstidct.h Normal file
View file

@ -0,0 +1,53 @@
/* Gnome-Streamer
* Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifndef __GST_IDCT_H__
#define __GST_IDCT_H__
#include <glib.h>
typedef enum {
GST_IDCT_DEFAULT, // default
GST_IDCT_INT, // integer IDCT
GST_IDCT_FAST_INT, // fastest integer
GST_IDCT_FLOAT, // accurate float version
GST_IDCT_MMX, // fast MMX (not accurate)
GST_IDCT_MMX32, // accurate MMX
} GstIDCTMethod;
typedef struct _GstIDCT GstIDCT;
typedef void (*GstIDCTFunction) (gshort *block);
#define GST_IDCT_TRANSPOSE(idct) ((idct)->need_transpose)
struct _GstIDCT {
/* private */
GstIDCTFunction convert;
GstIDCTFunction convert_sparse;
gboolean need_transpose;
};
GstIDCT *gst_idct_new(GstIDCTMethod method);
#define gst_idct_convert(idct, blocks) (idct)->convert((blocks))
#define gst_idct_convert_sparse(idct, blocks) (idct)->convert_sparse((blocks))
void gst_idct_destroy(GstIDCT *idct);
#endif /* __GST_IDCT_H__ */

339
libs/idct/ieeetest.c Normal file
View file

@ -0,0 +1,339 @@
/*
* ieeetest.c --- test IDCT code against the IEEE Std 1180-1990 spec
*
* Note that this does only one pass of the test.
* Six invocations of ieeetest are needed to complete the entire spec.
* The shell script "doieee" performs the complete test.
*
* Written by Tom Lane (tgl@cs.cmu.edu).
* Released to public domain 11/22/93.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <gst/gst.h>
#include "gstidct.h"
#include "dct.h"
/* prototypes */
void usage (char *msg);
long ieeerand (long L, long H);
void dct_init(void);
void ref_fdct(DCTELEM block[8][8]);
void ref_idct(DCTELEM block[8][8]);
/* error stat accumulators -- assume initialized to 0 */
long sumerrs[DCTSIZE2];
long sumsqerrs[DCTSIZE2];
int maxerr[DCTSIZE2];
char * meets (double val, double limit)
{
return ((fabs(val) <= limit) ? "meets" : "FAILS");
}
__inline__ void read_tsc(guint64 *dst) {
__asm__ __volatile__
("rdtsc"
: "=a" (*(guint32 *)dst), "=d" (*(((guint32 *)dst) + 1))
:
: "eax", "edx");
}
int
main(int argc, char **argv)
{
long minpix, maxpix, sign;
long curiter, niters;
int i, j;
double max, total;
int method;
DCTELEM block[DCTSIZE2]; /* random source data */
DCTELEM refcoefs[DCTSIZE2]; /* coefs from reference FDCT */
DCTELEM refout[DCTSIZE2]; /* output from reference IDCT */
DCTELEM testout[DCTSIZE2]; /* output from test IDCT */
GstIDCT *idct;
guint64 tscstart, tscmin = ~0, tscmax = 0;
guint64 tscstop;
/* Argument parsing --- not very bulletproof at all */
if (argc != 6) usage(NULL);
method = atoi(argv[1]);
minpix = atoi(argv[2]);
maxpix = atoi(argv[3]);
sign = atoi(argv[4]);
niters = atol(argv[5]);
gst_library_load("gstidct");
idct = gst_idct_new(method);
dct_init();
/* Loop once per generated random-data block */
for (curiter = 0; curiter < niters; curiter++) {
/* generate a pseudo-random block of data */
for (i = 0; i < DCTSIZE2; i++)
block[i] = (DCTELEM) (ieeerand(-minpix,maxpix) * sign);
/* perform reference FDCT */
memcpy(refcoefs, block, sizeof(DCTELEM)*DCTSIZE2);
ref_fdct(refcoefs);
/* clip */
for (i = 0; i < DCTSIZE2; i++) {
if (refcoefs[i] < -2048) refcoefs[i] = -2048;
else if (refcoefs[i] > 2047) refcoefs[i] = 2047;
}
/* perform reference IDCT */
memcpy(refout, refcoefs, sizeof(DCTELEM)*DCTSIZE2);
ref_idct(refout);
/* clip */
for (i = 0; i < DCTSIZE2; i++) {
if (refout[i] < -256) refout[i] = -256;
else if (refout[i] > 255) refout[i] = 255;
}
/* perform test IDCT */
if (GST_IDCT_TRANSPOSE(idct)) {
for (j = 0; j < DCTSIZE; j++) {
for (i = 0; i < DCTSIZE; i++) {
testout[i*DCTSIZE+j] = refcoefs[j*DCTSIZE+i];
}
}
}
else {
memcpy(testout, refcoefs, sizeof(DCTELEM)*DCTSIZE2);
}
read_tsc(&tscstart);
gst_idct_convert(idct, testout);
read_tsc(&tscstop);
//printf("time %llu, %llu %lld\n", tscstart, tscstop, tscstop-tscstart);
if (tscstop - tscstart < tscmin) tscmin = tscstop-tscstart;
if (tscstop - tscstart > tscmax) tscmax = tscstop-tscstart;
/* clip */
for (i = 0; i < DCTSIZE2; i++) {
if (testout[i] < -256) testout[i] = -256;
else if (testout[i] > 255) testout[i] = 255;
}
/* accumulate error stats */
for (i = 0; i < DCTSIZE2; i++) {
register int err = testout[i] - refout[i];
sumerrs[i] += err;
sumsqerrs[i] += err * err;
if (err < 0) err = -err;
if (maxerr[i] < err) maxerr[i] = err;
}
if (curiter % 100 == 99) {
fprintf(stderr, ".");
fflush(stderr);
}
}
fprintf(stderr, "\n");
/* print results */
printf("IEEE test conditions: -L = %ld, +H = %ld, sign = %ld, #iters = %ld\n",
minpix, maxpix, sign, niters);
printf("Speed, min time %lld, max %lld\n", tscmin, tscmax);
printf("Peak absolute values of errors:\n");
for (i = 0, j = 0; i < DCTSIZE2; i++) {
if (j < maxerr[i]) j = maxerr[i];
printf("%4d", maxerr[i]);
if ((i%DCTSIZE) == DCTSIZE-1) printf("\n");
}
printf("Worst peak error = %d (%s spec limit 1)\n\n", j,
meets((double) j, 1.0));
printf("Mean square errors:\n");
max = total = 0.0;
for (i = 0; i < DCTSIZE2; i++) {
double err = (double) sumsqerrs[i] / ((double) niters);
total += (double) sumsqerrs[i];
if (max < err) max = err;
printf(" %8.4f", err);
if ((i%DCTSIZE) == DCTSIZE-1) printf("\n");
}
printf("Worst pmse = %.6f (%s spec limit 0.06)\n", max, meets(max, 0.06));
total /= (double) (64*niters);
printf("Overall mse = %.6f (%s spec limit 0.02)\n\n", total,
meets(total, 0.02));
printf("Mean errors:\n");
max = total = 0.0;
for (i = 0; i < DCTSIZE2; i++) {
double err = (double) sumerrs[i] / ((double) niters);
total += (double) sumerrs[i];
printf(" %8.4f", err);
if (err < 0.0) err = -err;
if (max < err) max = err;
if ((i%DCTSIZE) == DCTSIZE-1) printf("\n");
}
printf("Worst mean error = %.6f (%s spec limit 0.015)\n", max,
meets(max, 0.015));
total /= (double) (64*niters);
printf("Overall mean error = %.6f (%s spec limit 0.0015)\n\n", total,
meets(total, 0.0015));
/* test for 0 input giving 0 output */
memset(testout, 0, sizeof(DCTELEM)*DCTSIZE2);
gst_idct_convert(idct, testout);
for (i = 0, j=0; i < DCTSIZE2; i++) {
if (testout[i]) {
printf("Position %d of IDCT(0) = %d (FAILS)\n", i, testout[i]);
j++;
}
}
printf("%d elements of IDCT(0) were not zero\n\n\n", j);
exit(0);
return 0;
}
void usage (char *msg)
{
if (msg != NULL)
fprintf(stderr, "\nerror: %s\n", msg);
fprintf(stderr, "\n");
fprintf(stderr, "usage: ieeetest minpix maxpix sign niters\n");
fprintf(stderr, "\n");
fprintf(stderr, " test = 1 - 5\n");
fprintf(stderr, " minpix = -L value per IEEE spec\n");
fprintf(stderr, " maxpix = H value per IEEE spec\n");
fprintf(stderr, " sign = +1 for normal, -1 to run negated test\n");
fprintf(stderr, " niters = # iterations (10000 for full test)\n");
fprintf(stderr, "\n");
exit(1);
}
/* Pseudo-random generator specified by IEEE 1180 */
long ieeerand (long L, long H)
{
static long randx = 1;
static double z = (double) 0x7fffffff;
long i,j;
double x;
randx = (randx * 1103515245) + 12345;
i = randx & 0x7ffffffe;
x = ((double) i) / z;
x *= (L+H+1);
j = x;
return j-L;
}
/* Reference double-precision FDCT and IDCT */
/* The cosine lookup table */
/* coslu[a][b] = C(b)/2 * cos[(2a+1)b*pi/16] */
double coslu[8][8];
/* Routine to initialise the cosine lookup table */
void dct_init(void)
{
int a,b;
double tmp;
for(a=0;a<8;a++)
for(b=0;b<8;b++) {
tmp = cos((double)((a+a+1)*b) * (3.14159265358979323846 / 16.0));
if(b==0)
tmp /= sqrt(2.0);
coslu[a][b] = tmp * 0.5;
}
}
void ref_fdct (DCTELEM block[8][8])
{
int x,y,u,v;
double tmp, tmp2;
double res[8][8];
for (v=0; v<8; v++) {
for (u=0; u<8; u++) {
tmp = 0.0;
for (y=0; y<8; y++) {
tmp2 = 0.0;
for (x=0; x<8; x++) {
tmp2 += (double) block[y][x] * coslu[x][u];
}
tmp += coslu[y][v] * tmp2;
}
res[v][u] = tmp;
}
}
for (v=0; v<8; v++) {
for (u=0; u<8; u++) {
tmp = res[v][u];
if (tmp < 0.0) {
x = - ((int) (0.5 - tmp));
} else {
x = (int) (tmp + 0.5);
}
block[v][u] = (DCTELEM) x;
}
}
}
void ref_idct (DCTELEM block[8][8])
{
int x,y,u,v;
double tmp, tmp2;
double res[8][8];
for (y=0; y<8; y++) {
for (x=0; x<8; x++) {
tmp = 0.0;
for (v=0; v<8; v++) {
tmp2 = 0.0;
for (u=0; u<8; u++) {
tmp2 += (double) block[v][u] * coslu[x][u];
}
tmp += coslu[y][v] * tmp2;
}
res[y][x] = tmp;
}
}
for (v=0; v<8; v++) {
for (u=0; u<8; u++) {
tmp = res[v][u];
if (tmp < 0.0) {
x = - ((int) (0.5 - tmp));
} else {
x = (int) (tmp + 0.5);
}
block[v][u] = (DCTELEM) x;
}
}
}

391
libs/idct/intidct.c Normal file
View file

@ -0,0 +1,391 @@
/*
* jrevdct.c
*
* Copyright (C) 1991, 1992, Thomas G. Lane.
* This file is part of the Independent JPEG Group's software.
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains the basic inverse-DCT transformation subroutine.
*
* This implementation is based on an algorithm described in
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
* The primary algorithm described there uses 11 multiplies and 29 adds.
* We use their alternate method with 12 multiplies and 32 adds.
* The advantage of this method is that no data path contains more than one
* multiplication; this allows a very simple and accurate implementation in
* scaled fixed-point arithmetic, with a minimal number of shifts.
*/
#include "dct.h"
/* We assume that right shift corresponds to signed division by 2 with
* rounding towards minus infinity. This is correct for typical "arithmetic
* shift" instructions that shift in copies of the sign bit. But some
* C compilers implement >> with an unsigned shift. For these machines you
* must define RIGHT_SHIFT_IS_UNSIGNED.
* RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
* It is only applied with constant shift counts. SHIFT_TEMPS must be
* included in the variables of any routine using RIGHT_SHIFT.
*/
#ifdef RIGHT_SHIFT_IS_UNSIGNED
#define SHIFT_TEMPS INT32 shift_temp;
#define RIGHT_SHIFT(x,shft) \
((shift_temp = (x)) < 0 ? \
(shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
(shift_temp >> (shft)))
#else
#define SHIFT_TEMPS
#define RIGHT_SHIFT(x,shft) ((x) >> (shft))
#endif
/*
* This routine is specialized to the case DCTSIZE = 8.
*/
#if DCTSIZE != 8
Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
#endif
/*
* A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
* on each column. Direct algorithms are also available, but they are
* much more complex and seem not to be any faster when reduced to code.
*
* The poop on this scaling stuff is as follows:
*
* Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
* larger than the true IDCT outputs. The final outputs are therefore
* a factor of N larger than desired; since N=8 this can be cured by
* a simple right shift at the end of the algorithm. The advantage of
* this arrangement is that we save two multiplications per 1-D IDCT,
* because the y0 and y4 inputs need not be divided by sqrt(N).
*
* We have to do addition and subtraction of the integer inputs, which
* is no problem, and multiplication by fractional constants, which is
* a problem to do in integer arithmetic. We multiply all the constants
* by CONST_SCALE and convert them to integer constants (thus retaining
* CONST_BITS bits of precision in the constants). After doing a
* multiplication we have to divide the product by CONST_SCALE, with proper
* rounding, to produce the correct output. This division can be done
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
* as long as possible so that partial sums can be added together with
* full fractional precision.
*
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
* they are represented to better-than-integral precision. These outputs
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
* with the recommended scaling. (To scale up 12-bit sample data further, an
* intermediate INT32 array would be needed.)
*
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
* shows that the values given below are the most effective.
*/
#ifdef EIGHT_BIT_SAMPLES
#define CONST_BITS 13
#define PASS1_BITS 2
#else
#define CONST_BITS 13
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
#endif
#define ONE ((INT32) 1)
#define CONST_SCALE (ONE << CONST_BITS)
/* Convert a positive real constant to an integer scaled by CONST_SCALE. */
#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5))
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
* causing a lot of useless floating-point operations at run time.
* To get around this we use the following pre-calculated constants.
* If you change CONST_BITS you may want to add appropriate values.
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
*/
#if CONST_BITS == 13
#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
#else
#define FIX_0_298631336 FIX(0.298631336)
#define FIX_0_390180644 FIX(0.390180644)
#define FIX_0_541196100 FIX(0.541196100)
#define FIX_0_765366865 FIX(0.765366865)
#define FIX_0_899976223 FIX(0.899976223)
#define FIX_1_175875602 FIX(1.175875602)
#define FIX_1_501321110 FIX(1.501321110)
#define FIX_1_847759065 FIX(1.847759065)
#define FIX_1_961570560 FIX(1.961570560)
#define FIX_2_053119869 FIX(2.053119869)
#define FIX_2_562915447 FIX(2.562915447)
#define FIX_3_072711026 FIX(3.072711026)
#endif
/* Descale and correctly round an INT32 value that's scaled by N bits.
* We assume RIGHT_SHIFT rounds towards minus infinity, so adding
* the fudge factor is correct for either sign of X.
*/
#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
* For 8-bit samples with the recommended scaling, all the variable
* and constant values involved are no more than 16 bits wide, so a
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
* this provides a useful speedup on many machines.
* There is no way to specify a 16x16->32 multiply in portable C, but
* some C compilers will do the right thing if you provide the correct
* combination of casts.
* NB: for 12-bit samples, a full 32-bit multiplication will be needed.
*/
#ifdef EIGHT_BIT_SAMPLES
#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
#define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))
#endif
#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
#define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const)))
#endif
#endif
#ifndef MULTIPLY /* default definition */
#define MULTIPLY(var,const) ((var) * (const))
#endif
/*
* Perform the inverse DCT on one block of coefficients.
*/
void
gst_idct_int_idct (DCTBLOCK data)
{
INT32 tmp0, tmp1, tmp2, tmp3;
INT32 tmp10, tmp11, tmp12, tmp13;
INT32 z1, z2, z3, z4, z5;
register DCTELEM *dataptr;
int rowctr;
SHIFT_TEMPS
/* Pass 1: process rows. */
/* Note results are scaled up by sqrt(8) compared to a true IDCT; */
/* furthermore, we scale the results by 2**PASS1_BITS. */
dataptr = data;
for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
/* Due to quantization, we will usually find that many of the input
* coefficients are zero, especially the AC terms. We can exploit this
* by short-circuiting the IDCT calculation for any row in which all
* the AC terms are zero. In that case each output is equal to the
* DC coefficient (with scale factor as needed).
* With typical images and quantization tables, half or more of the
* row DCT calculations can be simplified this way.
*/
if ((dataptr[1] | dataptr[2] | dataptr[3] | dataptr[4] |
dataptr[5] | dataptr[6] | dataptr[7]) == 0) {
/* AC terms all zero */
DCTELEM dcval = (DCTELEM) (dataptr[0] << PASS1_BITS);
dataptr[0] = dcval;
dataptr[1] = dcval;
dataptr[2] = dcval;
dataptr[3] = dcval;
dataptr[4] = dcval;
dataptr[5] = dcval;
dataptr[6] = dcval;
dataptr[7] = dcval;
dataptr += DCTSIZE; /* advance pointer to next row */
continue;
}
/* Even part: reverse the even part of the forward DCT. */
/* The rotator is sqrt(2)*c(-6). */
z2 = (INT32) dataptr[2];
z3 = (INT32) dataptr[6];
z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
tmp0 = ((INT32) dataptr[0] + (INT32) dataptr[4]) << CONST_BITS;
tmp1 = ((INT32) dataptr[0] - (INT32) dataptr[4]) << CONST_BITS;
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
/* Odd part per figure 8; the matrix is unitary and hence its
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
*/
tmp0 = (INT32) dataptr[7];
tmp1 = (INT32) dataptr[5];
tmp2 = (INT32) dataptr[3];
tmp3 = (INT32) dataptr[1];
z1 = tmp0 + tmp3;
z2 = tmp1 + tmp2;
z3 = tmp0 + tmp2;
z4 = tmp1 + tmp3;
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
tmp0 += z1 + z3;
tmp1 += z2 + z4;
tmp2 += z2 + z3;
tmp3 += z1 + z4;
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
dataptr += DCTSIZE; /* advance pointer to next row */
}
/* Pass 2: process columns. */
/* Note that we must descale the results by a factor of 8 == 2**3, */
/* and also undo the PASS1_BITS scaling. */
dataptr = data;
for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
/* Columns of zeroes can be exploited in the same way as we did with rows.
* However, the row calculation has created many nonzero AC terms, so the
* simplification applies less often (typically 5% to 10% of the time).
* On machines with very fast multiplication, it's possible that the
* test takes more time than it's worth. In that case this section
* may be commented out.
*/
#ifndef NO_ZERO_COLUMN_TEST
if ((dataptr[DCTSIZE*1] | dataptr[DCTSIZE*2] | dataptr[DCTSIZE*3] |
dataptr[DCTSIZE*4] | dataptr[DCTSIZE*5] | dataptr[DCTSIZE*6] |
dataptr[DCTSIZE*7]) == 0) {
/* AC terms all zero */
DCTELEM dcval = (DCTELEM) DESCALE((INT32) dataptr[0], PASS1_BITS+3);
dataptr[DCTSIZE*0] = dcval;
dataptr[DCTSIZE*1] = dcval;
dataptr[DCTSIZE*2] = dcval;
dataptr[DCTSIZE*3] = dcval;
dataptr[DCTSIZE*4] = dcval;
dataptr[DCTSIZE*5] = dcval;
dataptr[DCTSIZE*6] = dcval;
dataptr[DCTSIZE*7] = dcval;
dataptr++; /* advance pointer to next column */
continue;
}
#endif
/* Even part: reverse the even part of the forward DCT. */
/* The rotator is sqrt(2)*c(-6). */
z2 = (INT32) dataptr[DCTSIZE*2];
z3 = (INT32) dataptr[DCTSIZE*6];
z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
tmp0 = ((INT32) dataptr[DCTSIZE*0] + (INT32) dataptr[DCTSIZE*4]) << CONST_BITS;
tmp1 = ((INT32) dataptr[DCTSIZE*0] - (INT32) dataptr[DCTSIZE*4]) << CONST_BITS;
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
/* Odd part per figure 8; the matrix is unitary and hence its
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
*/
tmp0 = (INT32) dataptr[DCTSIZE*7];
tmp1 = (INT32) dataptr[DCTSIZE*5];
tmp2 = (INT32) dataptr[DCTSIZE*3];
tmp3 = (INT32) dataptr[DCTSIZE*1];
z1 = tmp0 + tmp3;
z2 = tmp1 + tmp2;
z3 = tmp0 + tmp2;
z4 = tmp1 + tmp3;
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
tmp0 += z1 + z3;
tmp1 += z2 + z4;
tmp2 += z2 + z3;
tmp3 += z1 + z4;
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
CONST_BITS+PASS1_BITS+3);
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
CONST_BITS+PASS1_BITS+3);
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
CONST_BITS+PASS1_BITS+3);
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
CONST_BITS+PASS1_BITS+3);
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
CONST_BITS+PASS1_BITS+3);
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
CONST_BITS+PASS1_BITS+3);
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
CONST_BITS+PASS1_BITS+3);
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
CONST_BITS+PASS1_BITS+3);
dataptr++; /* advance pointer to next column */
}
}

783
libs/idct/mmx32idct.c Normal file
View file

@ -0,0 +1,783 @@
/*
* idctmmx32.cpp
*
* Copyright (C) Alberto Vigata - January 2000 - ultraflask@yahoo.com
*
* This file is part of FlasKMPEG, a free MPEG to MPEG/AVI converter
*
* FlasKMPEG is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* FlasKMPEG is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
// MMX32 iDCT algorithm (IEEE-1180 compliant) :: idct_mmx32()
//
// MPEG2AVI
// --------
// v0.16B33 initial release
//
// This was one of the harder pieces of work to code.
// Intel's app-note focuses on the numerical issues of the algorithm, but
// assumes the programmer is familiar with IDCT mathematics, leaving the
// form of the complete function up to the programmer's imagination.
//
// ALGORITHM OVERVIEW
// ------------------
// I played around with the code for quite a few hours. I came up
// with *A* working IDCT algorithm, however I'm not sure whether my routine
// is "the correct one." But rest assured, my code passes all six IEEE
// accuracy tests with plenty of margin.
//
// My IDCT algorithm consists of 4 steps:
//
// 1) IDCT-row transformation (using the IDCT-row function) on all 8 rows
// This yields an intermediate 8x8 matrix.
//
// 2) intermediate matrix transpose (mandatory)
//
// 3) IDCT-row transformation (2nd time) on all 8 rows of the intermediate
// matrix. The output is the final-result, in transposed form.
//
// 4) post-transformation matrix transpose
// (not necessary if the input-data is already transposed, this could
// be done during the MPEG "zig-zag" scan, but since my algorithm
// requires at least one transpose operation, why not re-use the
// transpose-code.)
//
// Although the (1st) and (3rd) steps use the SAME row-transform operation,
// the (3rd) step uses different shift&round constants (explained later.)
//
// Also note that the intermediate transpose (2) would not be neccessary,
// if the subsequent operation were a iDCT-column transformation. Since
// we only have the iDCT-row transform, we transpose the intermediate
// matrix and use the iDCT-row transform a 2nd time.
//
// I had to change some constants/variables for my method to work :
//
// As given by Intel, the #defines for SHIFT_INV_COL and RND_INV_COL are
// wrong. Not surprising since I'm not using a true column-transform
// operation, but the row-transform operation (as mentioned earlier.)
// round_inv_col[], which is given as "4 short" values, should have the
// same dimensions as round_inv_row[]. The corrected variables are
// shown.
//
// Intel's code defines a different table for each each row operation.
// The tables given are 0/4, 1/7, 2/6, and 5/3. My code only uses row#0.
// Using the other rows messes up the overall transform.
//
// IMPLEMENTATION DETAILs
// ----------------------
//
// I divided the algorithm's work into two subroutines,
// 1) idct_mmx32_rows() - transforms 8 rows, then transpose
// 2) idct_mmx32_cols() - transforms 8 rows, then transpose
// yields final result ("drop-in" direct replacement for INT32 IDCT)
//
// The 2nd function is a clone of the 1st, with changes made only to the
// shift&rounding instructions.
//
// In the 1st function (rows), the shift & round instructions use
// SHIFT_INV_ROW & round_inv_row[] (renamed to r_inv_row[])
//
// In the 2nd function (cols)-> r_inv_col[], and
// SHIFT_INV_COL & round_inv_col[] (renamed to r_inv_col[])
//
// Each function contains an integrated transpose-operator, which comes
// AFTER the primary transformation operation. In the future, I'll optimize
// the code to do more of the transpose-work "in-place". Right now, I've
// left the code as two subroutines and a main calling function, so other
// people can read the code more easily.
//
// liaor@umcc.ais.org http://members.tripod.com/~liaor
//
//;=============================================================================
//;
//; AP-922 http://developer.intel.com/vtune/cbts/strmsimd
//; These examples contain code fragments for first stage iDCT 8x8
//; (for rows) and first stage DCT 8x8 (for columns)
//;
//;=============================================================================
/*
mword typedef qword
qword ptr equ mword ptr */
#include <mmx.h>
#define BITS_INV_ACC 4 //; 4 or 5 for IEEE
// 5 yields higher accuracy, but lessens dynamic range on the input matrix
#define SHIFT_INV_ROW (16 - BITS_INV_ACC)
#define SHIFT_INV_COL (1 + BITS_INV_ACC +14 ) // changed from Intel's val)
//#define SHIFT_INV_COL (1 + BITS_INV_ACC )
#define RND_INV_ROW (1 << (SHIFT_INV_ROW-1))
#define RND_INV_COL (1 << (SHIFT_INV_COL-1))
#define RND_INV_CORR (RND_INV_COL - 1) //; correction -1.0 and round
//#define RND_INV_ROW (1024 * (6 - BITS_INV_ACC)) //; 1 << (SHIFT_INV_ROW-1)
//#define RND_INV_COL (16 * (BITS_INV_ACC - 3)) //; 1 << (SHIFT_INV_COL-1)
//.data
//Align 16
const static long r_inv_row[2] = { RND_INV_ROW, RND_INV_ROW};
const static long r_inv_col[2] = {RND_INV_COL, RND_INV_COL};
const static long r_inv_corr[2] = {RND_INV_CORR, RND_INV_CORR };
//const static short r_inv_col[4] =
// {RND_INV_COL, RND_INV_COL, RND_INV_COL, RND_INV_COL};
//const static short r_inv_corr[4] =
// {RND_INV_CORR, RND_INV_CORR, RND_INV_CORR, RND_INV_CORR};
/* constants for the forward DCT
//#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy
//#define SHIFT_FRW_COL BITS_FRW_ACC
//#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17)
//#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1)
const static __int64 one_corr = 0x0001000100010001;
const static long r_frw_row[2] = {RND_FRW_ROW, RND_FRW_ROW };
//const static short tg_1_16[4] = {13036, 13036, 13036, 13036 }; //tg * (2<<16) + 0.5
//const static short tg_2_16[4] = {27146, 27146, 27146, 27146 }; //tg * (2<<16) + 0.5
//const static short tg_3_16[4] = {-21746, -21746, -21746, -21746 }; //tg * (2<<16) + 0.5
//const static short cos_4_16[4] = {-19195, -19195, -19195, -19195 }; //cos * (2<<16) + 0.5
//const static short ocos_4_16[4] = {23170, 23170, 23170, 23170 }; //cos * (2<<15) + 0.5
//concatenated table, for forward DCT transformation
const static short tg_all_16[] = {
13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5
27146, 27146, 27146, 27146, //tg * (2<<16) + 0.5
-21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5
-19195, -19195, -19195, -19195, //cos * (2<<16) + 0.5
23170, 23170, 23170, 23170 }; //cos * (2<<15) + 0.5
#define tg_1_16 (tg_all_16 + 0)
#define tg_2_16 (tg_all_16 + 8)
#define tg_3_16 (tg_all_16 + 16)
#define cos_4_16 (tg_all_16 + 24)
#define ocos_4_16 (tg_all_16 + 32)
*/
/*
;=============================================================================
;
; The first stage iDCT 8x8 - inverse DCTs of rows
;
;-----------------------------------------------------------------------------
; The 8-point inverse DCT direct algorithm
;-----------------------------------------------------------------------------
;
; static const short w[32] = {
; FIX(cos_4_16), FIX(cos_2_16), FIX(cos_4_16), FIX(cos_6_16),
; FIX(cos_4_16), FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16),
; FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16), FIX(cos_2_16),
; FIX(cos_4_16), -FIX(cos_2_16), FIX(cos_4_16), -FIX(cos_6_16),
; FIX(cos_1_16), FIX(cos_3_16), FIX(cos_5_16), FIX(cos_7_16),
; FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16),
; FIX(cos_5_16), -FIX(cos_1_16), FIX(cos_7_16), FIX(cos_3_16),
; FIX(cos_7_16), -FIX(cos_5_16), FIX(cos_3_16), -FIX(cos_1_16) };
;
; #define DCT_8_INV_ROW(x, y)
;{
; int a0, a1, a2, a3, b0, b1, b2, b3;
;
; a0 =x[0]*w[0]+x[2]*w[1]+x[4]*w[2]+x[6]*w[3];
; a1 =x[0]*w[4]+x[2]*w[5]+x[4]*w[6]+x[6]*w[7];
; a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11];
; a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15];
; b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19];
; b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23];
; b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27];
; b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31];
;
; y[0] = SHIFT_ROUND ( a0 + b0 );
; y[1] = SHIFT_ROUND ( a1 + b1 );
; y[2] = SHIFT_ROUND ( a2 + b2 );
; y[3] = SHIFT_ROUND ( a3 + b3 );
; y[4] = SHIFT_ROUND ( a3 - b3 );
; y[5] = SHIFT_ROUND ( a2 - b2 );
; y[6] = SHIFT_ROUND ( a1 - b1 );
; y[7] = SHIFT_ROUND ( a0 - b0 );
;}
;
;-----------------------------------------------------------------------------
;
; In this implementation the outputs of the iDCT-1D are multiplied
; for rows 0,4 - by cos_4_16,
; for rows 1,7 - by cos_1_16,
; for rows 2,6 - by cos_2_16,
; for rows 3,5 - by cos_3_16
; and are shifted to the left for better accuracy
;
; For the constants used,
; FIX(float_const) = (short) (float_const * (1<<15) + 0.5)
;
;=============================================================================
;=============================================================================
IF _MMX ; MMX code
;=============================================================================
//; Table for rows 0,4 - constants are multiplied by cos_4_16
const short tab_i_04[] = {
16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00
21407, 8867, 8867, -21407, // w07 w05 w03 w01
16384, -16384, 16384, 16384, //; w14 w12 w10 w08
-8867, 21407, -21407, -8867, //; w15 w13 w11 w09
22725, 12873, 19266, -22725, //; w22 w20 w18 w16
19266, 4520, -4520, -12873, //; w23 w21 w19 w17
12873, 4520, 4520, 19266, //; w30 w28 w26 w24
-22725, 19266, -12873, -22725 };//w31 w29 w27 w25
//; Table for rows 1,7 - constants are multiplied by cos_1_16
const short tab_i_17[] = {
22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00
29692, 12299, 12299, -29692, // ; w07 w05 w03 w01
22725, -22725, 22725, 22725, //; w14 w12 w10 w08
-12299, 29692, -29692, -12299, //; w15 w13 w11 w09
31521, 17855, 26722, -31521, //; w22 w20 w18 w16
26722, 6270, -6270, -17855, //; w23 w21 w19 w17
17855, 6270, 6270, 26722, //; w30 w28 w26 w24
-31521, 26722, -17855, -31521}; // w31 w29 w27 w25
//; Table for rows 2,6 - constants are multiplied by cos_2_16
const short tab_i_26[] = {
21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00
27969, 11585, 11585, -27969, // ; w07 w05 w03 w01
21407, -21407, 21407, 21407, // ; w14 w12 w10 w08
-11585, 27969, -27969, -11585, // ;w15 w13 w11 w09
29692, 16819, 25172, -29692, // ;w22 w20 w18 w16
25172, 5906, -5906, -16819, // ;w23 w21 w19 w17
16819, 5906, 5906, 25172, // ;w30 w28 w26 w24
-29692, 25172, -16819, -29692}; // ;w31 w29 w27 w25
//; Table for rows 3,5 - constants are multiplied by cos_3_16
const short tab_i_35[] = {
19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00
25172, 10426, 10426, -25172, //; w07 w05 w03 w01
19266, -19266, 19266, 19266, //; w14 w12 w10 w08
-10426, 25172, -25172, -10426, //; w15 w13 w11 w09
26722, 15137, 22654, -26722, //; w22 w20 w18 w16
22654, 5315, -5315, -15137, //; w23 w21 w19 w17
15137, 5315, 5315, 22654, //; w30 w28 w26 w24
-26722, 22654, -15137, -26722}; //; w31 w29 w27 w25
*/
// CONCATENATED TABLE, rows 0,1,2,3,4,5,6,7 (in order )
//
// In our implementation, however, we only use row0 !
//
static const short tab_i_01234567[] = {
//row0, this row is required
16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00
21407, 8867, 8867, -21407, // w07 w05 w03 w01
16384, -16384, 16384, 16384, //; w14 w12 w10 w08
-8867, 21407, -21407, -8867, //; w15 w13 w11 w09
22725, 12873, 19266, -22725, //; w22 w20 w18 w16
19266, 4520, -4520, -12873, //; w23 w21 w19 w17
12873, 4520, 4520, 19266, //; w30 w28 w26 w24
-22725, 19266, -12873, -22725, //w31 w29 w27 w25
// the rest of these rows (1-7), aren't used !
//row1
22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00
29692, 12299, 12299, -29692, // ; w07 w05 w03 w01
22725, -22725, 22725, 22725, //; w14 w12 w10 w08
-12299, 29692, -29692, -12299, //; w15 w13 w11 w09
31521, 17855, 26722, -31521, //; w22 w20 w18 w16
26722, 6270, -6270, -17855, //; w23 w21 w19 w17
17855, 6270, 6270, 26722, //; w30 w28 w26 w24
-31521, 26722, -17855, -31521, // w31 w29 w27 w25
//row2
21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00
27969, 11585, 11585, -27969, // ; w07 w05 w03 w01
21407, -21407, 21407, 21407, // ; w14 w12 w10 w08
-11585, 27969, -27969, -11585, // ;w15 w13 w11 w09
29692, 16819, 25172, -29692, // ;w22 w20 w18 w16
25172, 5906, -5906, -16819, // ;w23 w21 w19 w17
16819, 5906, 5906, 25172, // ;w30 w28 w26 w24
-29692, 25172, -16819, -29692, // ;w31 w29 w27 w25
//row3
19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00
25172, 10426, 10426, -25172, //; w07 w05 w03 w01
19266, -19266, 19266, 19266, //; w14 w12 w10 w08
-10426, 25172, -25172, -10426, //; w15 w13 w11 w09
26722, 15137, 22654, -26722, //; w22 w20 w18 w16
22654, 5315, -5315, -15137, //; w23 w21 w19 w17
15137, 5315, 5315, 22654, //; w30 w28 w26 w24
-26722, 22654, -15137, -26722, //; w31 w29 w27 w25
//row4
16384, 16384, 16384, -16384, // ; movq-> w06 w04 w02 w00
21407, 8867, 8867, -21407, // w07 w05 w03 w01
16384, -16384, 16384, 16384, //; w14 w12 w10 w08
-8867, 21407, -21407, -8867, //; w15 w13 w11 w09
22725, 12873, 19266, -22725, //; w22 w20 w18 w16
19266, 4520, -4520, -12873, //; w23 w21 w19 w17
12873, 4520, 4520, 19266, //; w30 w28 w26 w24
-22725, 19266, -12873, -22725, //w31 w29 w27 w25
//row5
19266, 19266, 19266, -19266, //; movq-> w06 w04 w02 w00
25172, 10426, 10426, -25172, //; w07 w05 w03 w01
19266, -19266, 19266, 19266, //; w14 w12 w10 w08
-10426, 25172, -25172, -10426, //; w15 w13 w11 w09
26722, 15137, 22654, -26722, //; w22 w20 w18 w16
22654, 5315, -5315, -15137, //; w23 w21 w19 w17
15137, 5315, 5315, 22654, //; w30 w28 w26 w24
-26722, 22654, -15137, -26722, //; w31 w29 w27 w25
//row6
21407, 21407, 21407, -21407, // ; movq-> w06 w04 w02 w00
27969, 11585, 11585, -27969, // ; w07 w05 w03 w01
21407, -21407, 21407, 21407, // ; w14 w12 w10 w08
-11585, 27969, -27969, -11585, // ;w15 w13 w11 w09
29692, 16819, 25172, -29692, // ;w22 w20 w18 w16
25172, 5906, -5906, -16819, // ;w23 w21 w19 w17
16819, 5906, 5906, 25172, // ;w30 w28 w26 w24
-29692, 25172, -16819, -29692, // ;w31 w29 w27 w25
//row7
22725, 22725, 22725, -22725, // ; movq-> w06 w04 w02 w00
29692, 12299, 12299, -29692, // ; w07 w05 w03 w01
22725, -22725, 22725, 22725, //; w14 w12 w10 w08
-12299, 29692, -29692, -12299, //; w15 w13 w11 w09
31521, 17855, 26722, -31521, //; w22 w20 w18 w16
26722, 6270, -6270, -17855, //; w23 w21 w19 w17
17855, 6270, 6270, 26722, //; w30 w28 w26 w24
-31521, 26722, -17855, -31521}; // w31 w29 w27 w25
#define INP eax // pointer to (short *blk)
#define OUT ecx // pointer to output (temporary store space qwTemp[])
#define TABLE ebx // pointer to tab_i_01234567[]
#define round_inv_row edx
#define round_inv_col edx
#define ROW_STRIDE 8 // for 8x8 matrix transposer
// private variables and functions
//temporary storage space, 8x8 of shorts
__inline static void idct_mmx32_rows( short *blk ); // transform rows
__inline static void idct_mmx32_cols( short *blk ); // transform "columns"
// the "column" transform actually transforms rows, it is
// identical to the row-transform except for the ROUNDING
// and SHIFTING coefficients.
static void
idct_mmx32_rows( short *blk ) // transform all 8 rows of 8x8 iDCT block
{
int x;
short qwTemp[64];
short *out = &qwTemp[0];
short *inptr = blk;
// this subroutine performs two operations
// 1) iDCT row transform
// for( i = 0; i < 8; ++ i)
// DCT_8_INV_ROW_1( blk[i*8], qwTemp[i] );
//
// 2) transpose the matrix (which was stored in qwTemp[])
// qwTemp[] -> [8x8 matrix transpose] -> blk[]
for (x=0; x<8; x++) { // transform one row per iteration
movq_m2r(*(inptr), mm0); // 0 ; x3 x2 x1 x0
movq_m2r(*(inptr+4), mm1); // 1 ; x7 x6 x5 x4
movq_r2r(mm0, mm2); // 2 ; x3 x2 x1 x0
movq_m2r(*(tab_i_01234567), mm3); // 3 ; w06 w04 w02 w00
punpcklwd_r2r(mm1, mm0); // x5 x1 x4 x0
// ----------
movq_r2r(mm0, mm5); // 5 ; x5 x1 x4 x0
punpckldq_r2r(mm0, mm0); // x4 x0 x4 x0
movq_m2r(*(tab_i_01234567+4), mm4); // 4 ; w07 w05 w03 w01
punpckhwd_r2r(mm1, mm2); // 1 ; x7 x3 x6 x2
pmaddwd_r2r(mm0, mm3); // x4*w06+x0*w04 x4*w02+x0*w00
movq_r2r(mm2, mm6); // 6 ; x7 x3 x6 x2
movq_m2r(*(tab_i_01234567+16), mm1);// 1 ; w22 w20 w18 w16
punpckldq_r2r(mm2, mm2); // x6 x2 x6 x2
pmaddwd_r2r(mm2, mm4); // x6*w07+x2*w05 x6*w03+x2*w01
punpckhdq_r2r(mm5, mm5); // x5 x1 x5 x1
pmaddwd_m2r(*(tab_i_01234567+8), mm0);// x4*w14+x0*w12 x4*w10+x0*w08
punpckhdq_r2r(mm6, mm6); // x7 x3 x7 x3
movq_m2r(*(tab_i_01234567+20), mm7);// 7 ; w23 w21 w19 w17
pmaddwd_r2r(mm5, mm1); // x5*w22+x1*w20 x5*w18+x1*w16
paddd_m2r(*(r_inv_row), mm3);// +rounder
pmaddwd_r2r(mm6, mm7); // x7*w23+x3*w21 x7*w19+x3*w17
pmaddwd_m2r(*(tab_i_01234567+12), mm2);// x6*w15+x2*w13 x6*w11+x2*w09
paddd_r2r(mm4, mm3); // 4 ; a1=sum(even1) a0=sum(even0)
pmaddwd_m2r(*(tab_i_01234567+24), mm5);// x5*w30+x1*w28 x5*w26+x1*w24
movq_r2r(mm3, mm4); // 4 ; a1 a0
pmaddwd_m2r(*(tab_i_01234567+28), mm6);// x7*w31+x3*w29 x7*w27+x3*w25
paddd_r2r(mm7, mm1); // 7 ; b1=sum(odd1) b0=sum(odd0)
paddd_m2r(*(r_inv_row), mm0);// +rounder
psubd_r2r(mm1, mm3); // a1-b1 a0-b0
psrad_i2r(SHIFT_INV_ROW, mm3); // y6=a1-b1 y7=a0-b0
paddd_r2r(mm4, mm1); // 4 ; a1+b1 a0+b0
paddd_r2r(mm2, mm0); // 2 ; a3=sum(even3) a2=sum(even2)
psrad_i2r(SHIFT_INV_ROW, mm1); // y1=a1+b1 y0=a0+b0
paddd_r2r(mm6, mm5); // 6 ; b3=sum(odd3) b2=sum(odd2)
movq_r2r(mm0, mm4); // 4 ; a3 a2
paddd_r2r(mm5, mm0); // a3+b3 a2+b2
psubd_r2r(mm5, mm4); // 5 ; a3-b3 a2-b2
psrad_i2r(SHIFT_INV_ROW, mm4); // y4=a3-b3 y5=a2-b2
psrad_i2r(SHIFT_INV_ROW, mm0); // y3=a3+b3 y2=a2+b2
packssdw_r2r(mm3, mm4); // 3 ; y6 y7 y4 y5
packssdw_r2r(mm0, mm1); // 0 ; y3 y2 y1 y0
movq_r2r(mm4, mm7); // 7 ; y6 y7 y4 y5
psrld_i2r(16, mm4); // 0 y6 0 y4
movq_r2m(mm1, *(out)); // 1 ; save y3 y2 y1 y0
pslld_i2r(16, mm7); // y7 0 y5 0
por_r2r(mm4, mm7); // 4 ; y7 y6 y5 y4
// begin processing row 1
movq_r2m(mm7, *(out+4)); // 7 ; save y7 y6 y5 y4
inptr += 8;
out += 8;
}
// done with the iDCT row-transformation
// now we have to transpose the output 8x8 matrix
// 8x8 (OUT) -> 8x8't' (IN)
// the transposition is implemented as 4 sub-operations.
// 1) transpose upper-left quad
// 2) transpose lower-right quad
// 3) transpose lower-left quad
// 4) transpose upper-right quad
// mm0 = 1st row [ A B C D ] row1
// mm1 = 2nd row [ E F G H ] 2
// mm2 = 3rd row [ I J K L ] 3
// mm3 = 4th row [ M N O P ] 4
// 1) transpose upper-left quad
out = &qwTemp[0];
movq_m2r(*(out + ROW_STRIDE * 0), mm0);
movq_m2r(*(out + ROW_STRIDE * 1), mm1);
movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D]
movq_m2r(*(out + ROW_STRIDE * 2), mm2);
punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5]
movq_m2r(*(out + ROW_STRIDE * 3), mm3);
punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7]
movq_r2r(mm2, mm6);
punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13]
punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15]
movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5]
inptr = blk;
punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12]
movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7]
punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13]
movq_r2m(mm0, *(inptr + ROW_STRIDE * 0)); // store row 1
punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14]
// begin reading next quadrant (lower-right)
movq_m2r(*(out + ROW_STRIDE*4 + 4), mm0);
punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15]
movq_r2m(mm4, *(inptr + ROW_STRIDE * 2)); // store row 3
movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D]
movq_r2m(mm1, *(inptr + ROW_STRIDE * 1)); // store row 2
movq_m2r(*(out + ROW_STRIDE*5 + 4), mm1);
movq_r2m(mm3, *(inptr + ROW_STRIDE * 3)); // store row 4
punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5]
// 2) transpose lower-right quadrant
// movq mm0, qword ptr [OUT + ROW_STRIDE*4 + 8]
// movq mm1, qword ptr [OUT + ROW_STRIDE*5 + 8]
// movq mm4, mm0; // mm4 = copy of row1[A B C D]
movq_m2r(*(out + ROW_STRIDE*6 + 4), mm2);
// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5]
punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7]
movq_m2r(*(out + ROW_STRIDE*7 + 4), mm3);
movq_r2r(mm2, mm6);
punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13]
movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5]
punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15]
movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7]
punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12]
punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13]
; // slot
movq_r2m(mm0, *(inptr + ROW_STRIDE*4 + 4)); // store row 1
punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14]
movq_m2r(*(out + ROW_STRIDE * 4 ), mm0);
punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15]
movq_r2m(mm4, *(inptr + ROW_STRIDE*6 + 4)); // store row 3
movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D]
movq_r2m(mm1, *(inptr + ROW_STRIDE*5 + 4)); // store row 2
; // slot
movq_m2r(*(out + ROW_STRIDE * 5 ), mm1);
; // slot
movq_r2m(mm3, *(inptr + ROW_STRIDE*7 + 4)); // store row 4
punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5]
// 3) transpose lower-left
// movq mm0, qword ptr [OUT + ROW_STRIDE * 4 ]
// movq mm1, qword ptr [OUT + ROW_STRIDE * 5 ]
// movq mm4, mm0; // mm4 = copy of row1[A B C D]
movq_m2r(*(out + ROW_STRIDE * 6 ), mm2);
// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5]
punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7]
movq_m2r(*(out + ROW_STRIDE * 7 ), mm3);
movq_r2r(mm2, mm6);
punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13]
movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5]
punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15]
movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7]
punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12]
punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13]
;//slot
movq_r2m(mm0, *(inptr + ROW_STRIDE * 0 + 4 )); // store row 1
punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14]
// begin reading next quadrant (upper-right)
movq_m2r(*(out + ROW_STRIDE*0 + 4), mm0);
punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15]
movq_r2m(mm4, *(inptr + ROW_STRIDE * 2 + 4)); // store row 3
movq_r2r(mm0, mm4); // mm4 = copy of row1[A B C D]
movq_r2m(mm1, *(inptr + ROW_STRIDE * 1 + 4)); // store row 2
movq_m2r(*(out + ROW_STRIDE*1 + 4), mm1);
movq_r2m(mm3, *(inptr + ROW_STRIDE * 3 + 4)); // store row 4
punpcklwd_r2r(mm1, mm0); // mm0 = [ 0 4 1 5]
// 2) transpose lower-right quadrant
// movq mm0, qword ptr [OUT + ROW_STRIDE*4 + 8]
// movq mm1, qword ptr [OUT + ROW_STRIDE*5 + 8]
// movq mm4, mm0; // mm4 = copy of row1[A B C D]
movq_m2r(*(out + ROW_STRIDE*2 + 4), mm2);
// punpcklwd mm0, mm1; // mm0 = [ 0 4 1 5]
punpckhwd_r2r(mm1, mm4); // mm4 = [ 2 6 3 7]
movq_m2r(*(out + ROW_STRIDE*3 + 4), mm3);
movq_r2r(mm2, mm6);
punpcklwd_r2r(mm3, mm2); // mm2 = [ 8 12 9 13]
movq_r2r(mm0, mm1); // mm1 = [ 0 4 1 5]
punpckhwd_r2r(mm3, mm6); // mm6 = 10 14 11 15]
movq_r2r(mm4, mm3); // mm3 = [ 2 6 3 7]
punpckldq_r2r(mm2, mm0); // final result mm0 = row1 [0 4 8 12]
punpckhdq_r2r(mm2, mm1); // mm1 = final result mm1 = row2 [1 5 9 13]
; // slot
movq_r2m(mm0, *(inptr + ROW_STRIDE*4)); // store row 1
punpckldq_r2r(mm6, mm4); // final result mm4 = row3 [2 6 10 14]
movq_r2m(mm1, *(inptr + ROW_STRIDE*5)); // store row 2
punpckhdq_r2r(mm6, mm3); // final result mm3 = row4 [3 7 11 15]
movq_r2m(mm4, *(inptr + ROW_STRIDE*6)); // store row 3
; // slot
movq_r2m(mm3, *(inptr + ROW_STRIDE*7)); // store row 4
; // slot
}
static void
idct_mmx32_cols( short *blk ) // transform all 8 cols of 8x8 iDCT block
{
int x;
short *inptr = blk;
// Despite the function's name, the matrix is transformed
// row by row. This function is identical to idct_mmx32_rows(),
// except for the SHIFT amount and ROUND_INV amount.
// this subroutine performs two operations
// 1) iDCT row transform
// for( i = 0; i < 8; ++ i)
// DCT_8_INV_ROW_1( blk[i*8], qwTemp[i] );
//
// 2) transpose the matrix (which was stored in qwTemp[])
// qwTemp[] -> [8x8 matrix transpose] -> blk[]
for (x=0; x<8; x++) { // transform one row per iteration
movq_m2r(*(inptr), mm0); // 0 ; x3 x2 x1 x0
movq_m2r(*(inptr+4), mm1); // 1 ; x7 x6 x5 x4
movq_r2r(mm0, mm2); // 2 ; x3 x2 x1 x0
movq_m2r(*(tab_i_01234567), mm3); // 3 ; w06 w04 w02 w00
punpcklwd_r2r(mm1, mm0); // x5 x1 x4 x0
// ----------
movq_r2r(mm0, mm5); // 5 ; x5 x1 x4 x0
punpckldq_r2r(mm0, mm0); // x4 x0 x4 x0
movq_m2r(*(tab_i_01234567+4), mm4); // 4 ; w07 w05 w03 w01
punpckhwd_r2r(mm1, mm2); // 1 ; x7 x3 x6 x2
pmaddwd_r2r(mm0, mm3); // x4*w06+x0*w04 x4*w02+x0*w00
movq_r2r(mm2, mm6); // 6 ; x7 x3 x6 x2
movq_m2r(*(tab_i_01234567+16), mm1);// 1 ; w22 w20 w18 w16
punpckldq_r2r(mm2, mm2); // x6 x2 x6 x2
pmaddwd_r2r(mm2, mm4); // x6*w07+x2*w05 x6*w03+x2*w01
punpckhdq_r2r(mm5, mm5); // x5 x1 x5 x1
pmaddwd_m2r(*(tab_i_01234567+8), mm0);// x4*w14+x0*w12 x4*w10+x0*w08
punpckhdq_r2r(mm6, mm6); // x7 x3 x7 x3
movq_m2r(*(tab_i_01234567+20), mm7);// 7 ; w23 w21 w19 w17
pmaddwd_r2r(mm5, mm1); // x5*w22+x1*w20 x5*w18+x1*w16
paddd_m2r(*(r_inv_col), mm3);// +rounder
pmaddwd_r2r(mm6, mm7); // x7*w23+x3*w21 x7*w19+x3*w17
pmaddwd_m2r(*(tab_i_01234567+12), mm2);// x6*w15+x2*w13 x6*w11+x2*w09
paddd_r2r(mm4, mm3); // 4 ; a1=sum(even1) a0=sum(even0)
pmaddwd_m2r(*(tab_i_01234567+24), mm5);// x5*w30+x1*w28 x5*w26+x1*w24
movq_r2r(mm3, mm4); // 4 ; a1 a0
pmaddwd_m2r(*(tab_i_01234567+28), mm6);// x7*w31+x3*w29 x7*w27+x3*w25
paddd_r2r(mm7, mm1); // 7 ; b1=sum(odd1) b0=sum(odd0)
paddd_m2r(*(r_inv_col), mm0);// +rounder
psubd_r2r(mm1, mm3); // a1-b1 a0-b0
psrad_i2r(SHIFT_INV_COL, mm3); // y6=a1-b1 y7=a0-b0
paddd_r2r(mm4, mm1); // 4 ; a1+b1 a0+b0
paddd_r2r(mm2, mm0); // 2 ; a3=sum(even3) a2=sum(even2)
psrad_i2r(SHIFT_INV_COL, mm1); // y1=a1+b1 y0=a0+b0
paddd_r2r(mm6, mm5); // 6 ; b3=sum(odd3) b2=sum(odd2)
movq_r2r(mm0, mm4); // 4 ; a3 a2
paddd_r2r(mm5, mm0); // a3+b3 a2+b2
psubd_r2r(mm5, mm4); // 5 ; a3-b3 a2-b2
psrad_i2r(SHIFT_INV_COL, mm4); // y4=a3-b3 y5=a2-b2
psrad_i2r(SHIFT_INV_COL, mm0); // y3=a3+b3 y2=a2+b2
packssdw_r2r(mm3, mm4); // 3 ; y6 y7 y4 y5
packssdw_r2r(mm0, mm1); // 0 ; y3 y2 y1 y0
movq_r2r(mm4, mm7); // 7 ; y6 y7 y4 y5
psrld_i2r(16, mm4); // 0 y6 0 y4
movq_r2m(mm1, *(inptr)); // 1 ; save y3 y2 y1 y0
pslld_i2r(16, mm7); // y7 0 y5 0
por_r2r(mm4, mm7); // 4 ; y7 y6 y5 y4
// begin processing row 1
movq_r2m(mm7, *(inptr+4)); // 7 ; save y7 y6 y5 y4
inptr += 8;
}
// done with the iDCT column-transformation
}
//
// public interface to MMX32 IDCT 8x8 operation
//
void
gst_idct_mmx32_idct( short *blk )
{
// 1) iDCT row transformation
idct_mmx32_rows( blk ); // 1) transform iDCT row, and transpose
// 2) iDCT column transformation
idct_mmx32_cols( blk ); // 2) transform iDCT row, and transpose
emms(); // restore processor state
// all done
}

725
libs/idct/mmxidct.S Normal file
View file

@ -0,0 +1,725 @@
/*
* the input data is tranposed and each 16 bit element in the 8x8 matrix
* is left aligned:
* for example in 11...1110000 format
* If the iDCT is of I macroblock then 0.5 needs to be added to the;DC Component
* (element[0][0] of the matrix)
*/
/* extrn re_matrix */
.data
.align 16
.type preSC,@object
preSC: .short 16384,22725,21407,19266,16384,12873,8867,4520
.short 22725,31521,29692,26722,22725,17855,12299,6270
.short 21407,29692,27969,25172,21407,16819,11585,5906
.short 19266,26722,25172,22654,19266,15137,10426,5315
.short 16384,22725,21407,19266,16384,12873,8867,4520
.short 12873,17855,16819,15137,25746,20228,13933,7103
.short 17734,24598,23170,20853,17734,13933,9597,4892
.short 18081,25080,23624,21261,18081,14206,9785,4988
.size preSC,128
.align 8
.type x0005000200010001,@object
.size x0005000200010001,8
x0005000200010001:
.long 0x00010001,0x00050002
.align 8
.type x0040000000000000,@object
.size x0040000000000000,8
x0040000000000000:
.long 0, 0x00400000
.align 8
.type x5a825a825a825a82,@object
.size x5a825a825a825a82,8
x5a825a825a825a82:
.long 0x5a825a82, 0x5a825a82
.align 8
.type x539f539f539f539f,@object
.size x539f539f539f539f,8
x539f539f539f539f:
.long 0x539f539f,0x539f539f
.align 8
.type x4546454645464546,@object
.size x4546454645464546,8
x4546454645464546:
.long 0x45464546,0x45464546
.align 8
.type x61f861f861f861f8,@object
.size x61f861f861f861f8,8
x61f861f861f861f8:
.long 0x61f861f8,0x61f861f8
.type x0004000000000000,@object
.size x0004000000000000,8
x0004000000000000:
.long 0x00000000,0x00040000
.type x0000000000000004,@object
.size x0000000000000004,8
x0000000000000004:
.long 0x00000004,0x00000000
.align 8
.type scratch1,@object
.size scratch1,8
scratch1:
.long 0,0
.align 8
.type scratch3,@object
.size scratch3,8
scratch3:
.long 0,0
.align 8
.type scratch5,@object
.size scratch5,8
scratch5:
.long 0,0
.align 8
.type scratch7,@object
.size scratch7,8
scratch7:
.long 0,0
.type x0,@object
.size x0,8
x0:
.long 0,0
.align 8
.text
.align 4
.globl gst_idct_mmx_idct
.type gst_idct_mmx_idct,@function
gst_idct_mmx_idct:
pushl %ebp
movl %esp,%ebp
pushl %ebx
pushl %ecx
pushl %edx
pushl %esi
pushl %edi
movl 8(%ebp),%esi /* source matrix */
movq (%esi), %mm0
paddw x0000000000000004, %mm0
movq 8(%esi), %mm1
psllw $4, %mm0
movq 16(%esi), %mm2
psllw $4, %mm1
movq 24(%esi), %mm3
psllw $4, %mm2
movq 32(%esi), %mm4
psllw $4, %mm3
movq 40(%esi), %mm5
psllw $4, %mm4
movq 48(%esi), %mm6
psllw $4, %mm5
movq 56(%esi), %mm7
psllw $4, %mm6
psllw $4, %mm7
movq %mm0, (%esi)
movq %mm1, 8(%esi)
movq %mm2,16(%esi)
movq %mm3,24(%esi)
movq %mm4,32(%esi)
movq %mm5,40(%esi)
movq %mm6,48(%esi)
movq %mm7,56(%esi)
movq 64(%esi), %mm0
movq 72(%esi), %mm1
psllw $4, %mm0
movq 80(%esi), %mm2
psllw $4, %mm1
movq 88(%esi), %mm3
psllw $4, %mm2
movq 96(%esi), %mm4
psllw $4, %mm3
movq 104(%esi), %mm5
psllw $4, %mm4
movq 112(%esi), %mm6
psllw $4, %mm5
movq 120(%esi), %mm7
psllw $4, %mm6
psllw $4, %mm7
movq %mm0,64(%esi)
movq %mm1,72(%esi)
movq %mm2,80(%esi)
movq %mm3,88(%esi)
movq %mm4,96(%esi)
movq %mm5,104(%esi)
movq %mm6,112(%esi)
movq %mm7,120(%esi)
leal preSC, %ecx
/* column 0: even part
* use V4, V12, V0, V8 to produce V22..V25
*/
movq 8*12(%ecx), %mm0 /* maybe the first mul can be done together */
/* with the dequantization in iHuff module */
pmulhw 8*12(%esi), %mm0 /* V12 */
movq 8*4(%ecx), %mm1
pmulhw 8*4(%esi), %mm1 /* V4 */
movq (%ecx), %mm3
psraw $1, %mm0 /* t64=t66 */
pmulhw (%esi), %mm3 /* V0 */
movq 8*8(%ecx), %mm5 /* duplicate V4 */
movq %mm1, %mm2 /* added 11/1/96 */
pmulhw 8*8(%esi),%mm5 /* V8 */
psubsw %mm0, %mm1 /* V16 */
pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
paddsw %mm0, %mm2 /* V17 */
movq %mm2, %mm0 /* duplicate V17 */
psraw $1, %mm2 /* t75=t82 */
psraw $2, %mm0 /* t72 */
movq %mm3, %mm4 /* duplicate V0 */
paddsw %mm5, %mm3 /* V19 */
psubsw %mm5, %mm4 /* V20 ;mm5 free */
/* moved from the block below */
movq 8*10(%ecx), %mm7
psraw $1, %mm3 /* t74=t81 */
movq %mm3, %mm6 /* duplicate t74=t81 */
psraw $2, %mm4 /* t77=t79 */
psubsw %mm0, %mm1 /* V21 ; mm0 free */
paddsw %mm2, %mm3 /* V22 */
movq %mm1, %mm5 /* duplicate V21 */
paddsw %mm4, %mm1 /* V23 */
movq %mm3, 8*4(%esi) /* V22 */
psubsw %mm5, %mm4 /* V24; mm5 free */
movq %mm1, 8*12(%esi) /* V23 */
psubsw %mm2, %mm6 /* V25; mm2 free */
movq %mm4, (%esi) /* V24 */
/* keep mm6 alive all along the next block */
/* movq %mm6, 8*8(%esi) V25 */
/* column 0: odd part
* use V2, V6, V10, V14 to produce V31, V39, V40, V41
*/
/* moved above: movq 8*10(%ecx), %mm7 */
pmulhw 8*10(%esi), %mm7 /* V10 */
movq 8*6(%ecx), %mm0
pmulhw 8*6(%esi), %mm0 /* V6 */
movq 8*2(%ecx), %mm5
movq %mm7, %mm3 /* duplicate V10 */
pmulhw 8*2(%esi), %mm5 /* V2 */
movq 8*14(%ecx), %mm4
psubsw %mm0, %mm7 /* V26 */
pmulhw 8*14(%esi), %mm4 /* V14 */
paddsw %mm0, %mm3 /* V29 ; free mm0 */
movq %mm7, %mm1 /* duplicate V26 */
psraw $1, %mm3 /* t91=t94 */
pmulhw x539f539f539f539f,%mm7 /* V33 */
psraw $1, %mm1 /* t96 */
movq %mm5, %mm0 /* duplicate V2 */
psraw $2, %mm4 /* t85=t87 */
paddsw %mm4,%mm5 /* V27 */
psubsw %mm4, %mm0 /* V28 ; free mm4 */
movq %mm0, %mm2 /* duplicate V28 */
psraw $1, %mm5 /* t90=t93 */
pmulhw x4546454645464546,%mm0 /* V35 */
psraw $1, %mm2 /* t97 */
movq %mm5, %mm4 /* duplicate t90=t93 */
psubsw %mm2, %mm1 /* V32 ; free mm2 */
pmulhw x61f861f861f861f8,%mm1 /* V36 */
psllw $1, %mm7 /* t107 */
paddsw %mm3, %mm5 /* V31 */
psubsw %mm3, %mm4 /* V30 ; free mm3 */
pmulhw x5a825a825a825a82,%mm4 /* V34 */
nop
psubsw %mm1, %mm0 /* V38 */
psubsw %mm7, %mm1 /* V37 ; free mm7 */
psllw $1, %mm1 /* t114 */
/* move from the next block */
movq %mm6, %mm3 /* duplicate V25 */
/* move from the next block */
movq 8*4(%esi), %mm7 /* V22 */
psllw $1, %mm0 /* t110 */
psubsw %mm5, %mm0 /* V39 (mm5 needed for next block) */
psllw $2, %mm4 /* t112 */
/* moved from the next block */
movq 8*12(%esi), %mm2 /* V23 */
psubsw %mm0, %mm4 /* V40 */
paddsw %mm4, %mm1 /* V41; free mm0 */
/* moved from the next block */
psllw $1, %mm2 /* t117=t125 */
/* column 0: output butterfly */
/* moved above:
* movq %mm6, %mm3 duplicate V25
* movq 8*4(%esi), %mm7 V22
* movq 8*12(%esi), %mm2 V23
* psllw $1, %mm2 t117=t125
*/
psubsw %mm1, %mm6 /* tm6 */
paddsw %mm1, %mm3 /* tm8; free mm1 */
movq %mm7, %mm1 /* duplicate V22 */
paddsw %mm5, %mm7 /* tm0 */
movq %mm3, 8*8(%esi) /* tm8; free mm3 */
psubsw %mm5, %mm1 /* tm14; free mm5 */
movq %mm6, 8*6(%esi) /* tm6; free mm6 */
movq %mm2, %mm3 /* duplicate t117=t125 */
movq (%esi), %mm6 /* V24 */
paddsw %mm0, %mm2 /* tm2 */
movq %mm7, (%esi) /* tm0; free mm7 */
psubsw %mm0, %mm3 /* tm12; free mm0 */
movq %mm1, 8*14(%esi) /* tm14; free mm1 */
psllw $1, %mm6 /* t119=t123 */
movq %mm2, 8*2(%esi) /* tm2; free mm2 */
movq %mm6, %mm0 /* duplicate t119=t123 */
movq %mm3, 8*12(%esi) /* tm12; free mm3 */
paddsw %mm4, %mm6 /* tm4 */
/* moved from next block */
movq 8*5(%ecx), %mm1
psubsw %mm4, %mm0 /* tm10; free mm4 */
/* moved from next block */
pmulhw 8*5(%esi), %mm1 /* V5 */
movq %mm6, 8*4(%esi) /* tm4; free mm6 */
movq %mm0, 8*10(%esi) /* tm10; free mm0 */
/* column 1: even part
* use V5, V13, V1, V9 to produce V56..V59
*/
/* moved to prev block:
* movq 8*5(%ecx), %mm1
* pmulhw 8*5(%esi), %mm1 V5
*/
movq 8*13(%ecx), %mm7
psllw $1, %mm1 /* t128=t130 */
pmulhw 8*13(%esi), %mm7 /* V13 */
movq %mm1, %mm2 /* duplicate t128=t130 */
movq 8(%ecx), %mm3
pmulhw 8(%esi), %mm3 /* V1 */
movq 8*9(%ecx), %mm5
psubsw %mm7, %mm1 /* V50 */
pmulhw 8*9(%esi), %mm5 /* V9 */
paddsw %mm7, %mm2 /* V51 */
pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
movq %mm2, %mm6 /* duplicate V51 */
psraw $1, %mm2 /* t138=t144 */
movq %mm3, %mm4 /* duplicate V1 */
psraw $2, %mm6 /* t136 */
paddsw %mm5, %mm3 /* V53 */
psubsw %mm5, %mm4 /* V54 ;mm5 free */
movq %mm3, %mm7 /* duplicate V53 */
/* moved from next block */
movq 8*11(%ecx), %mm0
psraw $1, %mm4 /* t140=t142 */
psubsw %mm6, %mm1 /* V55 ; mm6 free */
paddsw %mm2, %mm3 /* V56 */
movq %mm4, %mm5 /* duplicate t140=t142 */
paddsw %mm1, %mm4 /* V57 */
movq %mm3, 8*5(%esi) /* V56 */
psubsw %mm1, %mm5 /* V58; mm1 free */
movq %mm4, 8*13(%esi) /* V57 */
psubsw %mm2, %mm7 /* V59; mm2 free */
movq %mm5, 8*9(%esi) /* V58 */
/* keep mm7 alive all along the next block
* movq %mm7, 8(%esi) V59
* moved above
* movq 8*11(%ecx), %mm0
*/
pmulhw 8*11(%esi), %mm0 /* V11 */
movq 8*7(%ecx), %mm6
pmulhw 8*7(%esi), %mm6 /* V7 */
movq 8*15(%ecx), %mm4
movq %mm0, %mm3 /* duplicate V11 */
pmulhw 8*15(%esi), %mm4 /* V15 */
movq 8*3(%ecx), %mm5
psllw $1, %mm6 /* t146=t152 */
pmulhw 8*3(%esi), %mm5 /* V3 */
paddsw %mm6, %mm0 /* V63 */
/* note that V15 computation has a correction step:
* this is a 'magic' constant that rebiases the results to be closer to the
* expected result. this magic constant can be refined to reduce the error
* even more by doing the correction step in a later stage when the number
* is actually multiplied by 16
*/
paddw x0005000200010001, %mm4
psubsw %mm6, %mm3 /* V60 ; free mm6 */
psraw $1, %mm0 /* t154=t156 */
movq %mm3, %mm1 /* duplicate V60 */
pmulhw x539f539f539f539f, %mm1 /* V67 */
movq %mm5, %mm6 /* duplicate V3 */
psraw $2, %mm4 /* t148=t150 */
paddsw %mm4, %mm5 /* V61 */
psubsw %mm4, %mm6 /* V62 ; free mm4 */
movq %mm5, %mm4 /* duplicate V61 */
psllw $1, %mm1 /* t169 */
paddsw %mm0, %mm5 /* V65 -> result */
psubsw %mm0, %mm4 /* V64 ; free mm0 */
pmulhw x5a825a825a825a82, %mm4 /* V68 */
psraw $1, %mm3 /* t158 */
psubsw %mm6, %mm3 /* V66 */
movq %mm5, %mm2 /* duplicate V65 */
pmulhw x61f861f861f861f8, %mm3 /* V70 */
psllw $1, %mm6 /* t165 */
pmulhw x4546454645464546, %mm6 /* V69 */
psraw $1, %mm2 /* t172 */
/* moved from next block */
movq 8*5(%esi), %mm0 /* V56 */
psllw $1, %mm4 /* t174 */
/* moved from next block */
psraw $1, %mm0 /* t177=t188 */
nop
psubsw %mm3, %mm6 /* V72 */
psubsw %mm1, %mm3 /* V71 ; free mm1 */
psubsw %mm2, %mm6 /* V73 ; free mm2 */
/* moved from next block */
psraw $1, %mm5 /* t178=t189 */
psubsw %mm6, %mm4 /* V74 */
/* moved from next block */
movq %mm0, %mm1 /* duplicate t177=t188 */
paddsw %mm4, %mm3 /* V75 */
/* moved from next block */
paddsw %mm5, %mm0 /* tm1 */
/* location
* 5 - V56
* 13 - V57
* 9 - V58
* X - V59, mm7
* X - V65, mm5
* X - V73, mm6
* X - V74, mm4
* X - V75, mm3
* free mm0, mm1 & mm2
* moved above
* movq 8*5(%esi), %mm0 V56
* psllw $1, %mm0 t177=t188 ! new !!
* psllw $1, %mm5 t178=t189 ! new !!
* movq %mm0, %mm1 duplicate t177=t188
* paddsw %mm5, %mm0 tm1
*/
movq 8*13(%esi), %mm2 /* V57 */
psubsw %mm5, %mm1 /* tm15; free mm5 */
movq %mm0, 8(%esi) /* tm1; free mm0 */
psraw $1, %mm7 /* t182=t184 ! new !! */
/* save the store as used directly in the transpose
* movq %mm1, 120(%esi) tm15; free mm1
*/
movq %mm7, %mm5 /* duplicate t182=t184 */
psubsw %mm3, %mm7 /* tm7 */
paddsw %mm3, %mm5 /* tm9; free mm3 */
movq 8*9(%esi), %mm0 /* V58 */
movq %mm2, %mm3 /* duplicate V57 */
movq %mm7, 8*7(%esi) /* tm7; free mm7 */
psubsw %mm6, %mm3 /* tm13 */
paddsw %mm6, %mm2 /* tm3 ; free mm6 */
/* moved up from the transpose */
movq %mm3, %mm7
/* moved up from the transpose */
punpcklwd %mm1, %mm3
movq %mm0, %mm6 /* duplicate V58 */
movq %mm2, 8*3(%esi) /* tm3; free mm2 */
paddsw %mm4, %mm0 /* tm5 */
psubsw %mm4, %mm6 /* tm11; free mm4 */
/* moved up from the transpose */
punpckhwd %mm1, %mm7
movq %mm0, 8*5(%esi) /* tm5; free mm0 */
/* moved up from the transpose */
movq %mm5, %mm2
/* transpose - M4 part
* --------- ---------
* | M1 | M2 | | M1'| M3'|
* --------- --> ---------
* | M3 | M4 | | M2'| M4'|
* --------- ---------
* Two alternatives: use full mmword approach so the following code can be
* scheduled before the transpose is done without stores, or use the faster
* half mmword stores (when possible)
*/
movd %mm3, 8*9+4(%esi) /* MS part of tmt9 */
punpcklwd %mm6, %mm5
movd %mm7, 8*13+4(%esi) /* MS part of tmt13 */
punpckhwd %mm6, %mm2
movd %mm5, 8*9(%esi) /* LS part of tmt9 */
punpckhdq %mm3, %mm5 /* free mm3 */
movd %mm2, 8*13(%esi) /* LS part of tmt13 */
punpckhdq %mm7, %mm2 /* free mm7 */
/* moved up from the M3 transpose */
movq 8*8(%esi), %mm0
/* moved up from the M3 transpose */
movq 8*10(%esi), %mm1
/* moved up from the M3 transpose */
movq %mm0, %mm3
/* shuffle the rest of the data, and write it with 2 mmword writes */
movq %mm5, 8*11(%esi) /* tmt11 */
/* moved up from the M3 transpose */
punpcklwd %mm1, %mm0
movq %mm2, 8*15(%esi) /* tmt15 */
/* moved up from the M3 transpose */
punpckhwd %mm1, %mm3
/* transpose - M3 part
* moved up to previous code section
* movq 8*8(%esi), %mm0
* movq 8*10(%esi), %mm1
* movq %mm0, %mm3
* punpcklwd %mm1, %mm0
* punpckhwd %mm1, %mm3
*/
movq 8*12(%esi), %mm6
movq 8*14(%esi), %mm4
movq %mm6, %mm2
/* shuffle the data and write the lower parts of the transposed in 4 dwords */
punpcklwd %mm4, %mm6
movq %mm0, %mm1
punpckhdq %mm6, %mm1
movq %mm3, %mm7
punpckhwd %mm4, %mm2 /* free mm4 */
punpckldq %mm6, %mm0 /* free mm6 */
/* moved from next block */
movq 8*13(%esi), %mm4 /* tmt13 */
punpckldq %mm2, %mm3
punpckhdq %mm2, %mm7 /* free mm2 */
/* moved from next block */
movq %mm3, %mm5 /* duplicate tmt5 */
/* column 1: even part (after transpose)
* moved above
* movq %mm3, %mm5 duplicate tmt5
* movq 8*13(%esi), %mm4 tmt13
*/
psubsw %mm4, %mm3 /* V134 */
pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
movq 8*9(%esi), %mm6 /* tmt9 */
paddsw %mm4, %mm5 /* V135 ; mm4 free */
movq %mm0, %mm4 /* duplicate tmt1 */
paddsw %mm6, %mm0 /* V137 */
psubsw %mm6, %mm4 /* V138 ; mm6 free */
psllw $2, %mm3 /* t290 */
psubsw %mm5, %mm3 /* V139 */
movq %mm0, %mm6 /* duplicate V137 */
paddsw %mm5, %mm0 /* V140 */
movq %mm4, %mm2 /* duplicate V138 */
paddsw %mm3, %mm2 /* V141 */
psubsw %mm3, %mm4 /* V142 ; mm3 free */
movq %mm0, 8*9(%esi) /* V140 */
psubsw %mm5, %mm6 /* V143 ; mm5 free */
/* moved from next block */
movq 8*11(%esi), %mm0 /* tmt11 */
movq %mm2, 8*13(%esi) /* V141 */
/* moved from next block */
movq %mm0, %mm2 /* duplicate tmt11 */
/* column 1: odd part (after transpose) */
/* moved up to the prev block
* movq 8*11(%esi), %mm0 tmt11
* movq %mm0, %mm2 duplicate tmt11
*/
movq 8*15(%esi), %mm5 /* tmt15 */
psubsw %mm7, %mm0 /* V144 */
movq %mm0, %mm3 /* duplicate V144 */
paddsw %mm7, %mm2 /* V147 ; free mm7 */
pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
movq %mm1, %mm7 /* duplicate tmt3 */
paddsw %mm5, %mm7 /* V145 */
psubsw %mm5, %mm1 /* V146 ; free mm5 */
psubsw %mm1, %mm3 /* V150 */
movq %mm7, %mm5 /* duplicate V145 */
pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
psubsw %mm2, %mm5 /* V148 */
pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
psllw $2, %mm0 /* t311 */
pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
paddsw %mm2, %mm7 /* V149 ; free mm2 */
psllw $1, %mm1 /* t313 */
nop /* without the nop - freeze here for one clock */
movq %mm3, %mm2 /* duplicate V154 */
psubsw %mm0, %mm3 /* V155 ; free mm0 */
psubsw %mm2, %mm1 /* V156 ; free mm2 */
/* moved from the next block */
movq %mm6, %mm2 /* duplicate V143 */
/* moved from the next block */
movq 8*13(%esi), %mm0 /* V141 */
psllw $1, %mm1 /* t315 */
psubsw %mm7, %mm1 /* V157 (keep V149) */
psllw $2, %mm5 /* t317 */
psubsw %mm1, %mm5 /* V158 */
psllw $1, %mm3 /* t319 */
paddsw %mm5, %mm3 /* V159 */
/* column 1: output butterfly (after transform)
* moved to the prev block
* movq %mm6, %mm2 duplicate V143
* movq 8*13(%esi), %mm0 V141
*/
psubsw %mm3, %mm2 /* V163 */
paddsw %mm3, %mm6 /* V164 ; free mm3 */
movq %mm4, %mm3 /* duplicate V142 */
psubsw %mm5, %mm4 /* V165 ; free mm5 */
movq %mm2, scratch7 /* out7 */
psraw $4, %mm6
psraw $4, %mm4
paddsw %mm5, %mm3 /* V162 */
movq 8*9(%esi), %mm2 /* V140 */
movq %mm0, %mm5 /* duplicate V141 */
/* in order not to perculate this line up,
* we read 72(%esi) very near to this location
*/
movq %mm6, 8*9(%esi) /* out9 */
paddsw %mm1, %mm0 /* V161 */
movq %mm3, scratch5 /* out5 */
psubsw %mm1, %mm5 /* V166 ; free mm1 */
movq %mm4, 8*11(%esi) /* out11 */
psraw $4, %mm5
movq %mm0, scratch3 /* out3 */
movq %mm2, %mm4 /* duplicate V140 */
movq %mm5, 8*13(%esi) /* out13 */
paddsw %mm7, %mm2 /* V160 */
/* moved from the next block */
movq 8(%esi), %mm0
psubsw %mm7, %mm4 /* V167 ; free mm7 */
/* moved from the next block */
movq 8*3(%esi), %mm7
psraw $4, %mm4
movq %mm2, scratch1 /* out1 */
/* moved from the next block */
movq %mm0, %mm1
movq %mm4, 8*15(%esi) /* out15 */
/* moved from the next block */
punpcklwd %mm7, %mm0
/* transpose - M2 parts
* moved up to the prev block
* movq 8(%esi), %mm0
* movq 8*3(%esi), %mm7
* movq %mm0, %mm1
* punpcklwd %mm7, %mm0
*/
movq 8*5(%esi), %mm5
punpckhwd %mm7, %mm1
movq 8*7(%esi), %mm4
movq %mm5, %mm3
/* shuffle the data and write the lower parts of the trasposed in 4 dwords */
movd %mm0, 8*8(%esi) /* LS part of tmt8 */
punpcklwd %mm4, %mm5
movd %mm1, 8*12(%esi) /* LS part of tmt12 */
punpckhwd %mm4, %mm3
movd %mm5, 8*8+4(%esi) /* MS part of tmt8 */
punpckhdq %mm5, %mm0 /* tmt10 */
movd %mm3, 8*12+4(%esi) /* MS part of tmt12 */
punpckhdq %mm3, %mm1 /* tmt14 */
/* transpose - M1 parts */
movq (%esi), %mm7
movq 8*2(%esi), %mm2
movq %mm7, %mm6
movq 8*4(%esi), %mm5
punpcklwd %mm2, %mm7
movq 8*6(%esi), %mm4
punpckhwd %mm2, %mm6 /* free mm2 */
movq %mm5, %mm3
punpcklwd %mm4, %mm5
punpckhwd %mm4, %mm3 /* free mm4 */
movq %mm7, %mm2
movq %mm6, %mm4
punpckldq %mm5, %mm7 /* tmt0 */
punpckhdq %mm5, %mm2 /* tmt2 ; free mm5 */
/* shuffle the rest of the data, and write it with 2 mmword writes */
punpckldq %mm3, %mm6 /* tmt4 */
/* moved from next block */
movq %mm2, %mm5 /* duplicate tmt2 */
punpckhdq %mm3, %mm4 /* tmt6 ; free mm3 */
/* moved from next block */
movq %mm0, %mm3 /* duplicate tmt10 */
/* column 0: odd part (after transpose)
*moved up to prev block
* movq %mm0, %mm3 duplicate tmt10
* movq %mm2, %mm5 duplicate tmt2
*/
psubsw %mm4, %mm0 /* V110 */
paddsw %mm4, %mm3 /* V113 ; free mm4 */
movq %mm0, %mm4 /* duplicate V110 */
paddsw %mm1, %mm2 /* V111 */
pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
psubsw %mm1, %mm5 /* V112 ; free mm1 */
psubsw %mm5, %mm4 /* V116 */
movq %mm2, %mm1 /* duplicate V111 */
pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
psubsw %mm3, %mm2 /* V114 */
pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
paddsw %mm3, %mm1 /* V115 ; free mm3 */
pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
psllw $2, %mm0 /* t266 */
movq %mm1, (%esi) /* save V115 */
psllw $1, %mm5 /* t268 */
psubsw %mm4, %mm5 /* V122 */
psubsw %mm0, %mm4 /* V121 ; free mm0 */
psllw $1, %mm5 /* t270 */
psubsw %mm1, %mm5 /* V123 ; free mm1 */
psllw $2, %mm2 /* t272 */
psubsw %mm5, %mm2 /* V124 (keep V123) */
psllw $1, %mm4 /* t274 */
movq %mm5, 8*2(%esi) /* save V123 ; free mm5 */
paddsw %mm2, %mm4 /* V125 (keep V124) */
/* column 0: even part (after transpose) */
movq 8*12(%esi), %mm0 /* tmt12 */
movq %mm6, %mm3 /* duplicate tmt4 */
psubsw %mm0, %mm6 /* V100 */
paddsw %mm0, %mm3 /* V101 ; free mm0 */
pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
movq %mm7, %mm5 /* duplicate tmt0 */
movq 8*8(%esi), %mm1 /* tmt8 */
paddsw %mm1, %mm7 /* V103 */
psubsw %mm1, %mm5 /* V104 ; free mm1 */
movq %mm7, %mm0 /* duplicate V103 */
psllw $2, %mm6 /* t245 */
paddsw %mm3, %mm7 /* V106 */
movq %mm5, %mm1 /* duplicate V104 */
psubsw %mm3, %mm6 /* V105 */
psubsw %mm3, %mm0 /* V109; free mm3 */
paddsw %mm6, %mm5 /* V107 */
psubsw %mm6, %mm1 /* V108 ; free mm6 */
/* column 0: output butterfly (after transform) */
movq %mm1, %mm3 /* duplicate V108 */
paddsw %mm2, %mm1 /* out4 */
psraw $4, %mm1
psubsw %mm2, %mm3 /* out10 ; free mm2 */
psraw $4, %mm3
movq %mm0, %mm6 /* duplicate V109 */
movq %mm1, 8*4(%esi) /* out4 ; free mm1 */
psubsw %mm4, %mm0 /* out6 */
movq %mm3, 8*10(%esi) /* out10 ; free mm3 */
psraw $4, %mm0
paddsw %mm4, %mm6 /* out8 ; free mm4 */
movq %mm7, %mm1 /* duplicate V106 */
movq %mm0, 8*6(%esi) /* out6 ; free mm0 */
psraw $4, %mm6
movq (%esi), %mm4 /* V115 */
movq %mm6, 8*8(%esi) /* out8 ; free mm6 */
movq %mm5, %mm2 /* duplicate V107 */
movq 8*2(%esi), %mm3 /* V123 */
paddsw %mm4, %mm7 /* out0 */
/* moved up from next block */
movq scratch3, %mm0
psraw $4, %mm7
/* moved up from next block */
movq scratch5, %mm6
psubsw %mm4, %mm1 /* out14 ; free mm4 */
paddsw %mm3, %mm5 /* out2 */
psraw $4, %mm1
movq %mm7, (%esi) /* out0 ; free mm7 */
psraw $4, %mm5
movq %mm1, 8*14(%esi) /* out14 ; free mm1 */
psubsw %mm3, %mm2 /* out12 ; free mm3 */
movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
psraw $4, %mm2
/* moved up to the prev block */
movq scratch7, %mm4
/* moved up to the prev block */
psraw $4, %mm0
movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
/* moved up to the prev block */
psraw $4, %mm6
/* move back the data to its correct place
* moved up to the prev block
* movq scratch3, %mm0
* movq scratch5, %mm6
* movq scratch7, %mm4
* psraw $4, %mm0
* psraw $4, %mm6
*/
movq scratch1, %mm1
psraw $4, %mm4
movq %mm0, 8*3(%esi) /* out3 */
psraw $4, %mm1
movq %mm6, 8*5(%esi) /* out5 */
movq %mm4, 8*7(%esi) /* out7 */
movq %mm1, 8(%esi) /* out1 */
emms
popl %edi
popl %esi
popl %edx
popl %ecx
popl %ebx
movl %ebp,%esp
popl %ebp
ret
.Lfe1:
.size gst_idct_mmx_idct,.Lfe1-gst_idct_mmx_idct

View file

@ -63,7 +63,7 @@ void gst_putbits(gst_putbits_t *pb, int val, int n)
int i;
unsigned int mask;
//printf("putbits: %d %d %ld\n", val, n, pb->outcnt);
//printf("putbits: %d %d %ld %ld\n", val, n, pb->outcnt, pb->newlen);
mask = 1 << (n-1); /* selects first (leftmost) bit */
for (i=0; i<n; i++)

View file

@ -71,6 +71,7 @@
#define GST_RIFF_FCC_auds MAKE_FOUR_CC('a','u','d','s')
#define GST_RIFF_FCC_pads MAKE_FOUR_CC('p','a','d','s')
#define GST_RIFF_FCC_txts MAKE_FOUR_CC('t','x','t','s')
#define GST_RIFF_FCC_vidc MAKE_FOUR_CC('v','i','d','c')
/* fcc handlers */
#define GST_RIFF_FCCH_RLE MAKE_FOUR_CC('R','L','E',' ')
#define GST_RIFF_FCCH_msvc MAKE_FOUR_CC('m','s','v','c')
@ -184,6 +185,8 @@
#define GST_RIFF_vyuy MAKE_FOUR_CC( 'v', 'y', 'u', 'y')
#define GST_RIFF_VYUY MAKE_FOUR_CC( 'V', 'Y', 'U', 'Y')
#define GST_RIFF_DIV3 MAKE_FOUR_CC( 'D', 'I', 'V', '3')
#define GST_RIFF_rpza MAKE_FOUR_CC( 'r', 'p', 'z', 'a')
/* And this here's the mistakes that need to be supported */
#define GST_RIFF_azpr MAKE_FOUR_CC( 'a', 'z', 'p', 'r') /* recognize Apple's rpza mangled? */
@ -273,10 +276,14 @@ struct _gst_riff_strf_auds { /* == WaveHeader (?) */
#define GST_RIFF_WAVE_FORMAT_YAMAHA_ADPCM (0x0020)
#define GST_RIFF_WAVE_FORMAT_DSP_TRUESPEECH (0x0022)
#define GST_RIFF_WAVE_FORMAT_GSM610 (0x0031)
#define GST_RIFF_WAVE_FORMAT_MPEG (0x0055)
#define GST_RIFF_WAVE_FORMAT_MSN (0x0032)
#define GST_RIFF_WAVE_FORMAT_MPEGL12 (0x0050)
#define GST_RIFF_WAVE_FORMAT_MPEGL3 (0x0055)
#define GST_RIFF_IBM_FORMAT_MULAW (0x0101)
#define GST_RIFF_IBM_FORMAT_ALAW (0x0102)
#define GST_RIFF_IBM_FORMAT_ADPCM (0x0103)
#define GST_RIFF_WAVE_FORMAT_DIVX (0x0160)
#define GST_RIFF_WAVE_FORMAT_divx (0x0161)
guint16 channels;
guint32 rate;
guint32 av_bps;
@ -360,6 +367,7 @@ gint gst_riff_encoder_chunk(GstRiff *riff, guint32 chunk_type, void *chunk, gulo
GstBuffer *gst_riff_encoder_get_buffer(GstRiff *riff);
GstBuffer *gst_riff_encoder_get_and_reset_buffer(GstRiff *riff);
/* from gstriffutil.c */
gulong gst_riff_fourcc_to_id(gchar *fourcc);
gchar *gst_riff_id_to_fourcc(gulong id);

View file

@ -18,20 +18,19 @@
*/
//#define DEBUG_ENABLED
#include <gst/gst.h>
#include <gstriff.h>
#define GST_RIFF_ENCODER_BUF_SIZE 1024
//#define debug(format,args...) g_print(format,##args)
#define debug(format,args...)
#define ADD_CHUNK(riffenc, chunkid, chunksize) \
{ \
gst_riff_chunk *chunk;\
chunk = (gst_riff_chunk *)(riffenc->dataleft + riffenc->nextlikely);\
chunk->id = chunkid; \
chunk->size = chunksize; \
riffenc->nextlikely += sizeof(gst_riff_chunk); \
riffenc->nextlikely += sizeof(gst_riff_chunk) + (chunksize&1); \
}
#define ADD_LIST(riffenc, listsize, listtype) \
@ -49,7 +48,7 @@ GstRiff *gst_riff_encoder_new(guint32 type) {
GstRiff *riff;
gst_riff_list *list;
debug("gst_riff_encoder: making %4.4s encoder\n", (char *)&type);
DEBUG("gst_riff_encoder: making %4.4s encoder\n", (char *)&type);
riff = (GstRiff *)g_malloc(sizeof(GstRiff));
g_return_val_if_fail(riff != NULL, NULL);
@ -77,7 +76,7 @@ gint gst_riff_encoder_avih(GstRiff *riff, gst_riff_avih *head, gulong size) {
g_return_val_if_fail(riff->state == GST_RIFF_STATE_INITIAL, GST_RIFF_EINVAL);
debug("gst_riff_encoder: add avih\n");
DEBUG("gst_riff_encoder: add avih\n");
ADD_LIST(riff, 0xB8, GST_RIFF_LIST_hdrl);
@ -97,7 +96,7 @@ gint gst_riff_encoder_strh(GstRiff *riff, guint32 fcc_type, gst_riff_strh *head,
g_return_val_if_fail(riff->state == GST_RIFF_STATE_HASAVIH ||
riff->state == GST_RIFF_STATE_HASSTRF, GST_RIFF_EINVAL);
debug("gst_riff_encoder: add strh type %08x (%4.4s)\n", fcc_type, (char *)&fcc_type);
DEBUG("gst_riff_encoder: add strh type %08x (%4.4s)\n", fcc_type, (char *)&fcc_type);
ADD_LIST(riff, 108, GST_RIFF_LIST_strl);
@ -118,7 +117,7 @@ gint gst_riff_encoder_strf(GstRiff *riff, void *format, gulong size) {
g_return_val_if_fail(riff->state == GST_RIFF_STATE_HASSTRH, GST_RIFF_EINVAL);
debug("gst_riff_encoder: add strf\n");
DEBUG("gst_riff_encoder: add strf\n");
ADD_CHUNK(riff, GST_RIFF_TAG_strf, size);
@ -141,14 +140,14 @@ gint gst_riff_encoder_chunk(GstRiff *riff, guint32 chunk_type, void *chunkdata,
riff->state = GST_RIFF_STATE_MOVI;
}
debug("gst_riff_encoder: add chunk type %08x (%4.4s)\n", chunk_type, (char *)&chunk_type);
DEBUG("gst_riff_encoder: add chunk type %08x (%4.4s)\n", chunk_type, (char *)&chunk_type);
ADD_CHUNK(riff, chunk_type, size);
if (chunkdata != NULL) {
chunk = (gst_riff_chunk *)(riff->dataleft + riff->nextlikely);
memcpy(chunk, chunkdata, size);
riff->nextlikely += size;
riff->nextlikely += size + (size&1);
}
return GST_RIFF_OK;

View file

@ -2,7 +2,7 @@ filterdir = $(libdir)/gst
filter_LTLIBRARIES = libwinloader.la
libwinloader_la_SOURCES = driver.c elfdll.c ext.c externals.c module.c pe_image.c pe_resource.c registry.c resource.c stubs.s vfl.c
libwinloader_la_SOURCES = driver.c elfdll.c ext.c externals.c module.c pe_image.c pe_resource.c registry.c resource.c stubs.s vfl.c afl.c
libwinloaderincludedir = $(includedir)/gst/libs/winloader.h
libwinloaderinclude_HEADERS =

758
libs/winloader/afl.c Normal file
View file

@ -0,0 +1,758 @@
/**************************************************************************
This file will contain an interface to ACM drivers.
Its content will be based mainly on wine/dlls/msacm32
actually, for audio decompression only the following functions
are needed:
acmStreamOpen ( takes formats of src and dest, returns stream handle )
acmStreamPrepareHeader ( takes stream handler and info on data )
acmStreamConvert ( the same as PrepareHeader )
acmStreamUnprepareHeader
acmStreamClose
acmStreamSize
maybe acmStreamReset
In future I'll also add functions for format enumeration,
but not right now.
***************************************************************************/
#include <stdio.h>
#include <string.h>
#include <wine/winbase.h>
#include <wine/windef.h>
#include <wine/winuser.h>
#include <wine/vfw.h>
#include <wine/winestring.h>
#include <wine/driver.h>
#include <wine/winerror.h>
#include <wine/msacm.h>
#include <wine/msacmdrv.h>
#include "wineacm.h"
#pragma pack(1)
#define OpenDriverA DrvOpen
extern HDRVR VFWAPI DrvOpen(long);
#define CloseDriver DrvClose
extern HDRVR VFWAPI DrvClose(long);
static PWINE_ACMSTREAM ACM_GetStream(HACMSTREAM has)
{
return (PWINE_ACMSTREAM)has;
}
/***********************************************************************
* acmDriverAddA (MSACM32.2)
*/
MMRESULT WINAPI acmDriverAddA(PHACMDRIVERID phadid, HINSTANCE hinstModule,
LPARAM lParam, DWORD dwPriority, DWORD fdwAdd)
{
if (!phadid)
return MMSYSERR_INVALPARAM;
/* Check if any unknown flags */
if (fdwAdd &
~(ACM_DRIVERADDF_FUNCTION|ACM_DRIVERADDF_NOTIFYHWND|
ACM_DRIVERADDF_GLOBAL))
return MMSYSERR_INVALFLAG;
/* Check if any incompatible flags */
if ((fdwAdd & ACM_DRIVERADDF_FUNCTION) &&
(fdwAdd & ACM_DRIVERADDF_NOTIFYHWND))
return MMSYSERR_INVALFLAG;
/* FIXME: in fact, should GetModuleFileName(hinstModule) and do a
* LoadDriver on it, to be sure we can call SendDriverMessage on the
* hDrvr handle.
*/
*phadid = (HACMDRIVERID) MSACM_RegisterDriver(NULL, NULL, hinstModule);
/* FIXME: lParam, dwPriority and fdwAdd ignored */
return MMSYSERR_NOERROR;
}
/***********************************************************************
* acmDriverClose (MSACM32.4)
*/
MMRESULT WINAPI acmDriverClose(HACMDRIVER had, DWORD fdwClose)
{
PWINE_ACMDRIVER p;
PWINE_ACMDRIVER* tp;
if (fdwClose)
return MMSYSERR_INVALFLAG;
p = MSACM_GetDriver(had);
if (!p)
return MMSYSERR_INVALHANDLE;
for (tp = &(p->obj.pACMDriverID->pACMDriverList); *tp; *tp = (*tp)->pNextACMDriver) {
if (*tp == p) {
*tp = (*tp)->pNextACMDriver;
break;
}
}
if (p->hDrvr && !p->obj.pACMDriverID->pACMDriverList)
CloseDriver(p->hDrvr);
HeapFree(MSACM_hHeap, 0, p);
return MMSYSERR_NOERROR;
}
/***********************************************************************
* acmDriverEnum (MSACM32.7)
*/
MMRESULT WINAPI acmDriverEnum(ACMDRIVERENUMCB fnCallback, DWORD dwInstance, DWORD fdwEnum)
{
PWINE_ACMDRIVERID p;
DWORD fdwSupport;
if (!fnCallback) {
return MMSYSERR_INVALPARAM;
}
if (fdwEnum && ~(ACM_DRIVERENUMF_NOLOCAL|ACM_DRIVERENUMF_DISABLED)) {
return MMSYSERR_INVALFLAG;
}
for (p = MSACM_pFirstACMDriverID; p; p = p->pNextACMDriverID) {
fdwSupport = ACMDRIVERDETAILS_SUPPORTF_CODEC;
if (!p->bEnabled) {
if (fdwEnum & ACM_DRIVERENUMF_DISABLED)
fdwSupport |= ACMDRIVERDETAILS_SUPPORTF_DISABLED;
else
continue;
}
(*fnCallback)((HACMDRIVERID) p, dwInstance, fdwSupport);
}
return MMSYSERR_NOERROR;
}
/***********************************************************************
* acmDriverID (MSACM32.8)
*/
MMRESULT WINAPI acmDriverID(HACMOBJ hao, PHACMDRIVERID phadid, DWORD fdwDriverID)
{
PWINE_ACMOBJ pao;
pao = MSACM_GetObj(hao);
if (!pao)
return MMSYSERR_INVALHANDLE;
if (!phadid)
return MMSYSERR_INVALPARAM;
if (fdwDriverID)
return MMSYSERR_INVALFLAG;
*phadid = (HACMDRIVERID) pao->pACMDriverID;
return MMSYSERR_NOERROR;
}
/***********************************************************************
* acmDriverMessage (MSACM32.9)
* FIXME
* Not implemented
*/
LRESULT WINAPI acmDriverMessage(HACMDRIVER had, UINT uMsg, LPARAM lParam1, LPARAM lParam2)
{
PWINE_ACMDRIVER pad = MSACM_GetDriver(had);
if (!pad)
return MMSYSERR_INVALPARAM;
/* FIXME: Check if uMsg legal */
if (!SendDriverMessage(pad->hDrvr, uMsg, lParam1, lParam2))
return MMSYSERR_NOTSUPPORTED;
return MMSYSERR_NOERROR;
}
/***********************************************************************
* acmDriverOpen (MSACM32.10)
*/
MMRESULT WINAPI acmDriverOpen(PHACMDRIVER phad, HACMDRIVERID hadid, DWORD fdwOpen)
{
PWINE_ACMDRIVERID padid;
PWINE_ACMDRIVER pad;
ICOPEN icopen;
HDRVR hdrv;
TRACE("(%p, %x, %08lu)\n", phad, hadid, fdwOpen);
if (!phad)
return MMSYSERR_INVALPARAM;
padid = MSACM_GetDriverID(hadid);
if (!padid)
return MMSYSERR_INVALHANDLE;
if (fdwOpen)
return MMSYSERR_INVALFLAG;
pad = HeapAlloc(MSACM_hHeap, 0, sizeof(WINE_ACMDRIVER));
if (!pad) return MMSYSERR_NOMEM;
pad->obj.pACMDriverID = padid;
icopen.fccType = mmioFOURCC('a', 'u', 'd', 'c');
icopen.fccHandler = (long)padid->pszFileName;
icopen.dwSize = sizeof(ICOPEN);
icopen.dwFlags = 0;
if (!padid->hInstModule)
pad->hDrvr = OpenDriverA((long)&icopen);
else
pad->hDrvr = padid->hInstModule;
if (!pad->hDrvr) {
HeapFree(MSACM_hHeap, 0, pad);
return MMSYSERR_ERROR;
}
pad->pfnDriverProc = GetProcAddress(pad->hDrvr, "DriverProc");
/* insert new pad at beg of list */
pad->pNextACMDriver = padid->pACMDriverList;
padid->pACMDriverList = pad;
/* FIXME: Create a WINE_ACMDRIVER32 */
*phad = (HACMDRIVER)pad;
return MMSYSERR_NOERROR;
}
/***********************************************************************
* acmDriverRemove (MSACM32.12)
*/
MMRESULT WINAPI acmDriverRemove(HACMDRIVERID hadid, DWORD fdwRemove)
{
PWINE_ACMDRIVERID padid;
padid = MSACM_GetDriverID(hadid);
if (!padid)
return MMSYSERR_INVALHANDLE;
if (fdwRemove)
return MMSYSERR_INVALFLAG;
MSACM_UnregisterDriver(padid);
return MMSYSERR_NOERROR;
}
/**********************************************************************/
HANDLE MSACM_hHeap = (HANDLE) NULL;
PWINE_ACMDRIVERID MSACM_pFirstACMDriverID = NULL;
PWINE_ACMDRIVERID MSACM_pLastACMDriverID = NULL;
/***********************************************************************
* MSACM_RegisterDriver32()
*/
PWINE_ACMDRIVERID MSACM_RegisterDriver(LPSTR pszDriverAlias, LPSTR pszFileName,
HINSTANCE hinstModule)
//
// File names are stored in driver.c. I reuse this variable to store driver ID
// in it. If it's <0x10000, it is primary codec for corresponding format.
//
{
PWINE_ACMDRIVERID padid;
TRACE("('%s', '%x', 0x%08x)\n", pszDriverAlias, pszFileName, hinstModule);
padid = (PWINE_ACMDRIVERID) HeapAlloc(MSACM_hHeap, 0, sizeof(WINE_ACMDRIVERID));
padid->pszDriverAlias = (char*)malloc(strlen(pszDriverAlias)+1);
strcpy(padid->pszDriverAlias, pszDriverAlias);
// 1~strdup(pszDriverAlias);
padid->pszFileName = pszFileName;
padid->hInstModule = hinstModule;
padid->bEnabled = TRUE;
padid->pACMDriverList = NULL;
padid->pNextACMDriverID = NULL;
padid->pPrevACMDriverID = MSACM_pLastACMDriverID;
if (MSACM_pLastACMDriverID)
MSACM_pLastACMDriverID->pNextACMDriverID = padid;
MSACM_pLastACMDriverID = padid;
if (!MSACM_pFirstACMDriverID)
MSACM_pFirstACMDriverID = padid;
return padid;
}
/***********************************************************************
* MSACM_RegisterAllDrivers32()
*/
void MSACM_RegisterAllDrivers(void)
{
LPSTR pszBuffer;
DWORD dwBufferLength;
if (MSACM_pFirstACMDriverID)
return;
MSACM_RegisterDriver("divxa32", (LPSTR)0x161, 0);
MSACM_RegisterDriver("msadp32", (LPSTR)0x2, 0);
}
/***********************************************************************
* MSACM_UnregisterDriver32()
*/
PWINE_ACMDRIVERID MSACM_UnregisterDriver(PWINE_ACMDRIVERID p)
{
PWINE_ACMDRIVERID pNextACMDriverID;
while (p->pACMDriverList)
acmDriverClose((HACMDRIVER) p->pACMDriverList, 0);
if (p->pszDriverAlias)
HeapFree(MSACM_hHeap, 0, p->pszDriverAlias);
// if (p->pszFileName)
// HeapFree(MSACM_hHeap, 0, p->pszFileName);
if (p == MSACM_pFirstACMDriverID)
MSACM_pFirstACMDriverID = p->pNextACMDriverID;
if (p == MSACM_pLastACMDriverID)
MSACM_pLastACMDriverID = p->pPrevACMDriverID;
if (p->pPrevACMDriverID)
p->pPrevACMDriverID->pNextACMDriverID = p->pNextACMDriverID;
if (p->pNextACMDriverID)
p->pNextACMDriverID->pPrevACMDriverID = p->pPrevACMDriverID;
pNextACMDriverID = p->pNextACMDriverID;
HeapFree(MSACM_hHeap, 0, p);
return pNextACMDriverID;
}
/***********************************************************************
* MSACM_UnregisterAllDrivers32()
* FIXME
* Where should this function be called?
*/
void MSACM_UnregisterAllDrivers(void)
{
PWINE_ACMDRIVERID p;
for (p = MSACM_pFirstACMDriverID; p; p = MSACM_UnregisterDriver(p));
}
/***********************************************************************
* MSACM_GetDriverID32()
*/
PWINE_ACMDRIVERID MSACM_GetDriverID(HACMDRIVERID hDriverID)
{
return (PWINE_ACMDRIVERID)hDriverID;
}
/***********************************************************************
* MSACM_GetDriver32()
*/
PWINE_ACMDRIVER MSACM_GetDriver(HACMDRIVER hDriver)
{
return (PWINE_ACMDRIVER)hDriver;
}
/***********************************************************************
* MSACM_GetObj32()
*/
PWINE_ACMOBJ MSACM_GetObj(HACMOBJ hObj)
{
return (PWINE_ACMOBJ)hObj;
}
/***********************************************************************
* acmStreamOpen (MSACM32.40)
*/
MMRESULT WINAPI acmStreamOpen(PHACMSTREAM phas, HACMDRIVER had, PWAVEFORMATEX pwfxSrc,
PWAVEFORMATEX pwfxDst, PWAVEFILTER pwfltr, DWORD dwCallback,
DWORD dwInstance, DWORD fdwOpen)
{
PWINE_ACMSTREAM was;
PWINE_ACMDRIVER wad;
MMRESULT ret;
int wfxSrcSize;
int wfxDstSize;
TRACE("(%p, 0x%08x, %p, %p, %p, %ld, %ld, %ld)\n",
phas, had, pwfxSrc, pwfxDst, pwfltr, dwCallback, dwInstance, fdwOpen);
TRACE("src [wFormatTag=%u, nChannels=%u, nSamplesPerSec=%lu, nAvgBytesPerSec=%lu, nBlockAlign=%u, wBitsPerSample=%u, cbSize=%u]\n",
pwfxSrc->wFormatTag, pwfxSrc->nChannels, pwfxSrc->nSamplesPerSec, pwfxSrc->nAvgBytesPerSec,
pwfxSrc->nBlockAlign, pwfxSrc->wBitsPerSample, pwfxSrc->cbSize);
TRACE("dst [wFormatTag=%u, nChannels=%u, nSamplesPerSec=%lu, nAvgBytesPerSec=%lu, nBlockAlign=%u, wBitsPerSample=%u, cbSize=%u]\n",
pwfxDst->wFormatTag, pwfxDst->nChannels, pwfxDst->nSamplesPerSec, pwfxDst->nAvgBytesPerSec,
pwfxDst->nBlockAlign, pwfxDst->wBitsPerSample, pwfxDst->cbSize);
#define SIZEOF_WFX(wfx) (sizeof(WAVEFORMATEX) + ((wfx->wFormatTag == WAVE_FORMAT_PCM) ? 0 : wfx->cbSize))
wfxSrcSize = SIZEOF_WFX(pwfxSrc);
wfxDstSize = SIZEOF_WFX(pwfxDst);
#undef SIZEOF_WFX
was = HeapAlloc(MSACM_hHeap, 0, sizeof(*was) + wfxSrcSize + wfxDstSize + ((pwfltr) ? sizeof(WAVEFILTER) : 0));
if (was == NULL)
return MMSYSERR_NOMEM;
was->drvInst.cbStruct = sizeof(was->drvInst);
was->drvInst.pwfxSrc = (PWAVEFORMATEX)((LPSTR)was + sizeof(*was));
memcpy(was->drvInst.pwfxSrc, pwfxSrc, wfxSrcSize);
was->drvInst.pwfxDst = (PWAVEFORMATEX)((LPSTR)was + sizeof(*was) + wfxSrcSize);
memcpy(was->drvInst.pwfxDst, pwfxDst, wfxDstSize);
if (pwfltr) {
was->drvInst.pwfltr = (PWAVEFILTER)((LPSTR)was + sizeof(*was) + wfxSrcSize + wfxDstSize);
memcpy(was->drvInst.pwfltr, pwfltr, sizeof(WAVEFILTER));
} else {
was->drvInst.pwfltr = NULL;
}
was->drvInst.dwCallback = dwCallback;
was->drvInst.dwInstance = dwInstance;
was->drvInst.fdwOpen = fdwOpen;
was->drvInst.fdwDriver = 0L;
was->drvInst.dwDriver = 0L;
was->drvInst.has = (HACMSTREAM)was;
if (had) {
if (!(wad = MSACM_GetDriver(had))) {
ret = MMSYSERR_INVALPARAM;
goto errCleanUp;
}
was->obj.pACMDriverID = wad->obj.pACMDriverID;
was->pDrv = wad;
was->hAcmDriver = 0; /* not to close it in acmStreamClose */
ret = SendDriverMessage(wad->hDrvr, ACMDM_STREAM_OPEN, (DWORD)&was->drvInst, 0L);
if (ret != MMSYSERR_NOERROR)
goto errCleanUp;
} else {
PWINE_ACMDRIVERID wadi;
short drv_tag;
ret = ACMERR_NOTPOSSIBLE;
/* if(pwfxSrc->wFormatTag==1)//compression
drv_tag=pwfxDst->wFormatTag;
else
if(pwfxDst->wFormatTag==1)//decompression
drv_tag=pwfxSrc->wFormatTag;
else
goto errCleanUp;
ret=acmDriverOpen2(drv_tag);
if (ret == MMSYSERR_NOERROR) {
if ((wad = MSACM_GetDriver(had)) != 0) {
was->obj.pACMDriverID = wad->obj.pACMDriverID;
was->pDrv = wad;
was->hAcmDriver = had;
ret = SendDriverMessage(wad->hDrvr, ACMDM_STREAM_OPEN, (DWORD)&was->drvInst, 0L);
if (ret == MMSYSERR_NOERROR) {
if (fdwOpen & ACM_STREAMOPENF_QUERY) {
acmDriverClose(had, 0L);
}
break;
}
}
acmDriverClose(had, 0L);*/
if(MSACM_pFirstACMDriverID==NULL)
MSACM_RegisterAllDrivers();
for (wadi = MSACM_pFirstACMDriverID; wadi; wadi = wadi->pNextACMDriverID) {
ret = acmDriverOpen(&had, (HACMDRIVERID)wadi, 0L);
if (ret == MMSYSERR_NOERROR) {
if ((wad = MSACM_GetDriver(had)) != 0) {
was->obj.pACMDriverID = wad->obj.pACMDriverID;
was->pDrv = wad;
was->hAcmDriver = had;
ret = SendDriverMessage(wad->hDrvr, ACMDM_STREAM_OPEN, (DWORD)&was->drvInst, 0L);
if (ret == MMSYSERR_NOERROR) {
if (fdwOpen & ACM_STREAMOPENF_QUERY) {
acmDriverClose(had, 0L);
}
break;
}
}
// no match, close this acm driver and try next one
acmDriverClose(had, 0L);
}
}
if (ret != MMSYSERR_NOERROR) {
ret = ACMERR_NOTPOSSIBLE;
goto errCleanUp;
}
}
ret = MMSYSERR_NOERROR;
if (!(fdwOpen & ACM_STREAMOPENF_QUERY)) {
if (phas)
*phas = (HACMSTREAM)was;
TRACE("=> (%d)\n", ret);
return ret;
}
errCleanUp:
if (phas)
*phas = (HACMSTREAM)0;
HeapFree(MSACM_hHeap, 0, was);
TRACE("=> (%d)\n", ret);
return ret;
}
MMRESULT WINAPI acmStreamClose(HACMSTREAM has, DWORD fdwClose)
{
PWINE_ACMSTREAM was;
MMRESULT ret;
TRACE("(0x%08x, %ld)\n", has, fdwClose);
if ((was = ACM_GetStream(has)) == NULL) {
return MMSYSERR_INVALHANDLE;
}
ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_CLOSE, (DWORD)&was->drvInst, 0);
if (ret == MMSYSERR_NOERROR) {
if (was->hAcmDriver)
acmDriverClose(was->hAcmDriver, 0L);
HeapFree(MSACM_hHeap, 0, was);
}
TRACE("=> (%d)\n", ret);
return ret;
}
/***********************************************************************
* acmStreamConvert (MSACM32.38)
*/
MMRESULT WINAPI acmStreamConvert(HACMSTREAM has, PACMSTREAMHEADER pash,
DWORD fdwConvert)
{
PWINE_ACMSTREAM was;
MMRESULT ret = MMSYSERR_NOERROR;
PACMDRVSTREAMHEADER padsh;
TRACE("(0x%08x, %p, %ld)\n", has, pash, fdwConvert);
if ((was = ACM_GetStream(has)) == NULL)
return MMSYSERR_INVALHANDLE;
if (!pash || pash->cbStruct < sizeof(ACMSTREAMHEADER))
return MMSYSERR_INVALPARAM;
if (!(pash->fdwStatus & ACMSTREAMHEADER_STATUSF_PREPARED))
return ACMERR_UNPREPARED;
/* Note: the ACMSTREAMHEADER and ACMDRVSTREAMHEADER structs are of same
* size. some fields are private to msacm internals, and are exposed
* in ACMSTREAMHEADER in the dwReservedDriver array
*/
padsh = (PACMDRVSTREAMHEADER)pash;
/* check that pointers have not been modified */
if (padsh->pbPreparedSrc != padsh->pbSrc ||
padsh->cbPreparedSrcLength < padsh->cbSrcLength ||
padsh->pbPreparedDst != padsh->pbDst ||
padsh->cbPreparedDstLength < padsh->cbDstLength) {
return MMSYSERR_INVALPARAM;
}
padsh->fdwConvert = fdwConvert;
ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_CONVERT, (DWORD)&was->drvInst, (DWORD)padsh);
if (ret == MMSYSERR_NOERROR) {
padsh->fdwStatus |= ACMSTREAMHEADER_STATUSF_DONE;
}
TRACE("=> (%d)\n", ret);
return ret;
}
/***********************************************************************
* acmStreamPrepareHeader (MSACM32.41)
*/
MMRESULT WINAPI acmStreamPrepareHeader(HACMSTREAM has, PACMSTREAMHEADER pash,
DWORD fdwPrepare)
{
PWINE_ACMSTREAM was;
MMRESULT ret = MMSYSERR_NOERROR;
PACMDRVSTREAMHEADER padsh;
TRACE("(0x%08x, %p, %ld)\n", has, pash, fdwPrepare);
if ((was = ACM_GetStream(has)) == NULL)
return MMSYSERR_INVALHANDLE;
if (!pash || pash->cbStruct < sizeof(ACMSTREAMHEADER))
return MMSYSERR_INVALPARAM;
if (fdwPrepare)
ret = MMSYSERR_INVALFLAG;
if (pash->fdwStatus & ACMSTREAMHEADER_STATUSF_DONE)
return MMSYSERR_NOERROR;
/* Note: the ACMSTREAMHEADER and ACMDRVSTREAMHEADER structs are of same
* size. some fields are private to msacm internals, and are exposed
* in ACMSTREAMHEADER in the dwReservedDriver array
*/
padsh = (PACMDRVSTREAMHEADER)pash;
padsh->fdwConvert = fdwPrepare;
padsh->padshNext = NULL;
padsh->fdwDriver = padsh->dwDriver = 0L;
padsh->fdwPrepared = 0;
padsh->dwPrepared = 0;
padsh->pbPreparedSrc = 0;
padsh->cbPreparedSrcLength = 0;
padsh->pbPreparedDst = 0;
padsh->cbPreparedDstLength = 0;
ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_PREPARE, (DWORD)&was->drvInst, (DWORD)padsh);
if (ret == MMSYSERR_NOERROR || ret == MMSYSERR_NOTSUPPORTED) {
ret = MMSYSERR_NOERROR;
padsh->fdwStatus &= ~(ACMSTREAMHEADER_STATUSF_DONE|ACMSTREAMHEADER_STATUSF_INQUEUE);
padsh->fdwStatus |= ACMSTREAMHEADER_STATUSF_PREPARED;
padsh->fdwPrepared = padsh->fdwStatus;
padsh->dwPrepared = 0;
padsh->pbPreparedSrc = padsh->pbSrc;
padsh->cbPreparedSrcLength = padsh->cbSrcLength;
padsh->pbPreparedDst = padsh->pbDst;
padsh->cbPreparedDstLength = padsh->cbDstLength;
} else {
padsh->fdwPrepared = 0;
padsh->dwPrepared = 0;
padsh->pbPreparedSrc = 0;
padsh->cbPreparedSrcLength = 0;
padsh->pbPreparedDst = 0;
padsh->cbPreparedDstLength = 0;
}
TRACE("=> (%d)\n", ret);
return ret;
}
/***********************************************************************
* acmStreamReset (MSACM32.42)
*/
MMRESULT WINAPI acmStreamReset(HACMSTREAM has, DWORD fdwReset)
{
PWINE_ACMSTREAM was;
MMRESULT ret = MMSYSERR_NOERROR;
TRACE("(0x%08x, %ld)\n", has, fdwReset);
if (fdwReset) {
ret = MMSYSERR_INVALFLAG;
} else if ((was = ACM_GetStream(has)) == NULL) {
return MMSYSERR_INVALHANDLE;
} else if (was->drvInst.fdwOpen & ACM_STREAMOPENF_ASYNC) {
ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_RESET, (DWORD)&was->drvInst, 0);
}
TRACE("=> (%d)\n", ret);
return ret;
}
/***********************************************************************
* acmStreamSize (MSACM32.43)
*/
MMRESULT WINAPI acmStreamSize(HACMSTREAM has, DWORD cbInput,
LPDWORD pdwOutputBytes, DWORD fdwSize)
{
PWINE_ACMSTREAM was;
ACMDRVSTREAMSIZE adss;
MMRESULT ret;
TRACE("(0x%08x, %ld, %p, %ld)\n", has, cbInput, pdwOutputBytes, fdwSize);
if ((was = ACM_GetStream(has)) == NULL) {
return MMSYSERR_INVALHANDLE;
}
if ((fdwSize & ~ACM_STREAMSIZEF_QUERYMASK) != 0) {
return MMSYSERR_INVALFLAG;
}
*pdwOutputBytes = 0L;
switch (fdwSize & ACM_STREAMSIZEF_QUERYMASK) {
case ACM_STREAMSIZEF_DESTINATION:
adss.cbDstLength = cbInput;
adss.cbSrcLength = 0;
break;
case ACM_STREAMSIZEF_SOURCE:
adss.cbSrcLength = cbInput;
adss.cbDstLength = 0;
break;
default:
return MMSYSERR_INVALFLAG;
}
adss.cbStruct = sizeof(adss);
adss.fdwSize = fdwSize;
ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_SIZE,
(DWORD)&was->drvInst, (DWORD)&adss);
if (ret == MMSYSERR_NOERROR) {
switch (fdwSize & ACM_STREAMSIZEF_QUERYMASK) {
case ACM_STREAMSIZEF_DESTINATION:
*pdwOutputBytes = adss.cbSrcLength;
break;
case ACM_STREAMSIZEF_SOURCE:
*pdwOutputBytes = adss.cbDstLength;
break;
}
}
TRACE("=> (%d) [%lu]\n", ret, *pdwOutputBytes);
return ret;
}
/***********************************************************************
* acmStreamUnprepareHeader (MSACM32.44)
*/
MMRESULT WINAPI acmStreamUnprepareHeader(HACMSTREAM has, PACMSTREAMHEADER pash,
DWORD fdwUnprepare)
{
PWINE_ACMSTREAM was;
MMRESULT ret = MMSYSERR_NOERROR;
PACMDRVSTREAMHEADER padsh;
TRACE("(0x%08x, %p, %ld)\n", has, pash, fdwUnprepare);
if ((was = ACM_GetStream(has)) == NULL)
return MMSYSERR_INVALHANDLE;
if (!pash || pash->cbStruct < sizeof(ACMSTREAMHEADER))
return MMSYSERR_INVALPARAM;
if (!(pash->fdwStatus & ACMSTREAMHEADER_STATUSF_PREPARED))
return ACMERR_UNPREPARED;
/* Note: the ACMSTREAMHEADER and ACMDRVSTREAMHEADER structs are of same
* size. some fields are private to msacm internals, and are exposed
* in ACMSTREAMHEADER in the dwReservedDriver array
*/
padsh = (PACMDRVSTREAMHEADER)pash;
/* check that pointers have not been modified */
if (padsh->pbPreparedSrc != padsh->pbSrc ||
padsh->cbPreparedSrcLength < padsh->cbSrcLength ||
padsh->pbPreparedDst != padsh->pbDst ||
padsh->cbPreparedDstLength < padsh->cbDstLength) {
return MMSYSERR_INVALPARAM;
}
padsh->fdwConvert = fdwUnprepare;
ret = SendDriverMessage(was->pDrv->hDrvr, ACMDM_STREAM_UNPREPARE, (DWORD)&was->drvInst, (DWORD)padsh);
if (ret == MMSYSERR_NOERROR || ret == MMSYSERR_NOTSUPPORTED) {
ret = MMSYSERR_NOERROR;
padsh->fdwStatus &= ~(ACMSTREAMHEADER_STATUSF_DONE|ACMSTREAMHEADER_STATUSF_INQUEUE|ACMSTREAMHEADER_STATUSF_PREPARED);
}
TRACE("=> (%d)\n", ret);
return ret;
}

View file

@ -6,8 +6,6 @@
#include <wine/vfw.h>
#include <registry.h>
#include <config.h>
#define STORE_ALL \
__asm__ ( \
"push %%ebx\n\t" \
@ -39,6 +37,26 @@ typedef DRVR *LPDRVR;
static DWORD dwDrvID = 0;
LRESULT WINAPI SendDriverMessage( HDRVR hDriver, UINT message,
LPARAM lParam1, LPARAM lParam2 )
{
DRVR* module=(DRVR*)hDriver;
int result;
#ifdef DETAILED_OUT
printf("SendDriverMessage: driver %X, message %X, arg1 %X, arg2 %X\n", hDriver, message, lParam1, lParam2);
#endif
if(module==0)return -1;
if(module->hDriverModule==0)return -1;
if(module->DriverProc==0)return -1;
STORE_ALL;
result=module->DriverProc(module->dwDriverID,1,message,lParam1,lParam2);
REST_ALL;
#ifdef DETAILED_OUT
printf("\t\tResult: %X\n", result);
#endif
return result;
}
static NPDRVR DrvAlloc(HDRVR*lpDriver, LPUINT lpDrvResult)
{
NPDRVR npDriver;
@ -66,28 +84,44 @@ typedef struct
int usage;
}codec_t;
static codec_t codecs[3]={
{0, PLUGINS_SRCDIR "/win32/divxc32.dll", 0},
{0, PLUGINS_SRCDIR "/win32/ir50_32.dll", 0},
// {0, "./mpg4c32.dll", 0},
{0, PLUGINS_SRCDIR "/win32/libvideodll.so", 0},
#define Win32Path "/usr/lib/win32/"
static codec_t avi_codecs[]={
{0, Win32Path"divxc32.dll", 0}, //0
{0, Win32Path"ir50_32.dll", 0},
{0, Win32Path"ir41_32.dll", 0},
{0, Win32Path"ir32_32.dll", 0},
{0, Win32Path"mpg4c32.dll", 0},
{0, Win32Path"iccvid.dll", 0}, //5
{0, Win32Path"libvideodll.so", 0},
{0, Win32Path"divxa32.acm", 0}, //7
{0, Win32Path"msadp32.acm", 0},
};
static void DrvFree(HDRVR hDriver)
{
int i;
FreeLibrary(((DRVR*)hDriver)->hDriverModule);
if(hDriver)
for(i=0; i<sizeof(codecs)/sizeof(codecs[0]); i++)
if(codecs[i].handle==((DRVR*)hDriver)->hDriverModule)
if(((DRVR*)hDriver)->hDriverModule)
if(((DRVR*)hDriver)->DriverProc)
(((DRVR*)hDriver)->DriverProc)(((DRVR*)hDriver)->dwDriverID, hDriver, DRV_CLOSE, 0, 0);
if(hDriver)
for(i=0; i<sizeof(avi_codecs)/sizeof(avi_codecs[0]); i++)
if(avi_codecs[i].handle==((DRVR*)hDriver)->hDriverModule)
{
codecs[i].handle=0;
codecs[i].usage--;
if (hDriver)
free((NPDRVR)hDriver);
return;
}
avi_codecs[i].usage--;
if(avi_codecs[i].usage==0)
{
avi_codecs[i].handle=0;
if(((DRVR*)hDriver)->hDriverModule)
if(((DRVR*)hDriver)->DriverProc)
(((DRVR*)hDriver)->DriverProc)(0, hDriver, DRV_FREE, 0, 0);
FreeLibrary(((DRVR*)hDriver)->hDriverModule);
if (hDriver)
free((NPDRVR)hDriver);
return;
}
}
}
void DrvClose(HDRVR hdrvr)
@ -110,46 +144,85 @@ DrvOpen(LPARAM lParam2)
int regs[10];
int fccHandler=*((int*)lParam2+2);
switch(fccHandler)
{
case mmioFOURCC('D', 'I', 'V', '3'):
case mmioFOURCC('D', 'I', 'V', '4'):
case mmioFOURCC('d', 'i', 'v', '3'):
case mmioFOURCC('d', 'i', 'v', '4'):
drv_id=0;
break;
case mmioFOURCC('I', 'V', '5', '0'):
case mmioFOURCC('i', 'v', '5', '0'):
drv_id=1;
break;
case mmioFOURCC('m', 'p', '4', '3'):
case mmioFOURCC('M', 'P', 'G', '4'):
drv_id=2;
break;
default:
printf("Unknown codec %X='%c%c%c%c'\n", fccHandler,
fccHandler&0xFF, (fccHandler&0xFF00)>>8,
(fccHandler&0xFF0000)>>16, (fccHandler&0xFF000000)>>24);
return (HDRVR)0;
}
int fccType=*((int*)lParam2+1);
if(fccType==0x63646976)//vidc
switch(fccHandler)
{
case mmioFOURCC('D', 'I', 'V', '3'):
case mmioFOURCC('D', 'I', 'V', '4'):
case mmioFOURCC('d', 'i', 'v', '3'):
case mmioFOURCC('d', 'i', 'v', '4'):
printf("Video in DivX ;-) format\n");
drv_id=0;
break;
case mmioFOURCC('I', 'V', '5', '0'):
case mmioFOURCC('i', 'v', '5', '0'):
printf("Video in Indeo Video 5 format\n");
drv_id=1;
break;
case mmioFOURCC('I', 'V', '4', '1'):
case mmioFOURCC('i', 'v', '4', '1'):
printf("Video in Indeo Video 4.1 format\n");
drv_id=2;
break;
case mmioFOURCC('I', 'V', '3', '2'):
case mmioFOURCC('i', 'v', '3', '2'):
printf("Video in Indeo Video 3.2 format\n");
drv_id=3;
break;
case mmioFOURCC('m', 'p', '4', '1'):
case mmioFOURCC('m', 'p', '4', '2'):
case mmioFOURCC('m', 'p', '4', '3'):
case mmioFOURCC('M', 'P', 'G', '4'):
case mmioFOURCC('M', 'P', '4', '1'):
case mmioFOURCC('M', 'P', '4', '2'):
case mmioFOURCC('M', 'P', '4', '3'):
printf("Video in Microsoft MPEG-4 format\n");
drv_id=4;
break;
case mmioFOURCC('c', 'v', 'i', 'd'):
printf("Video in Cinepak format\n");
drv_id=5;
break;
default:
printf("Unknown codec %X='%c%c%c%c'\n", fccHandler,
fccHandler&0xFF, (fccHandler&0xFF00)>>8,
(fccHandler&0xFF0000)>>16, (fccHandler&0xFF000000)>>24);
return (HDRVR)0;
}
else
switch(fccHandler)
{
case 0x160://DivX audio
case 0x161://DivX audio
drv_id=7;
break;
case 0x2://MS ADPCM
drv_id=8;
break;
default:
printf("Unknown ACM codec 0x%X\n", fccHandler);
return (HDRVR)0;
}
if (!(npDriver = DrvAlloc(&hDriver, &uDrvResult)))
return ((HDRVR) 0);
if(codecs[drv_id].handle==0)
if(avi_codecs[drv_id].handle==0)
{
if (!(codecs[drv_id].handle=npDriver->hDriverModule = LoadLibraryA(codecs[drv_id].name)))
if (!(avi_codecs[drv_id].handle=npDriver->hDriverModule = LoadLibraryA(avi_codecs[drv_id].name)))
{
printf("Can't open library %s\n", avi_codecs[drv_id].name);
DrvFree(hDriver);
return ((HDRVR) 0);
}
else codecs[drv_id].usage=1;
else avi_codecs[drv_id].usage=1;
}
else
{
npDriver->hDriverModule=codecs[drv_id].handle;
codecs[drv_id].usage++;
npDriver->hDriverModule=avi_codecs[drv_id].handle;
avi_codecs[drv_id].usage++;
}
// 14c0
@ -173,28 +246,29 @@ DrvOpen(LPARAM lParam2)
no_reg:
;
}
if (!(npDriver->DriverProc = (DRIVERPROC)
GetProcAddress(npDriver->hDriverModule, "DriverProc")))
{
printf("Library %s is not a valid codec\n", avi_codecs[drv_id].name);
FreeLibrary(npDriver->hDriverModule);
DrvFree(hDriver);
return ((HDRVR) 0);
}
//printf("DriverProc == %X\n", npDriver->DriverProc);
TRACE("DriverProc == %X\n", npDriver->DriverProc);
npDriver->dwDriverID = ++dwDrvID;
if (codecs[drv_id].usage==1)
if (avi_codecs[drv_id].usage==1)
{
STORE_ALL;
(npDriver->DriverProc)(0, hDriver, DRV_LOAD, 0, 0);
REST_ALL;
//printf("DRV_LOAD Ok!\n");
TRACE("DRV_LOAD Ok!\n");
STORE_ALL;
(npDriver->DriverProc)(0, hDriver, DRV_ENABLE, 0, 0);
REST_ALL;
//printf("DRV_ENABLE Ok!\n");
TRACE("DRV_ENABLE Ok!\n");
}
// open driver
@ -203,7 +277,7 @@ DrvOpen(LPARAM lParam2)
(LPARAM) (LPSTR) unknown, lParam2);
REST_ALL;
//printf("DRV_OPEN Ok!(%X)\n", npDriver->dwDriverID);
TRACE("DRV_OPEN Ok!(%X)\n", npDriver->dwDriverID);
if (uDrvResult)
{

View file

@ -3,6 +3,9 @@
*
* Copyright 1999 Bertho A. Stultiens
*/
#include <config.h>
#ifdef HAVE_LIBDL
#include <string.h>
#include <ctype.h>
@ -42,7 +45,7 @@ extern DWORD fixup_imports(WINE_MODREF *wm);
extern void dump_exports(HMODULE hModule);
/*---------------- END HACKS ---------------*/
char *extra_ld_library_path = NULL; /* The extra search-path set in wine.conf */
char *extra_ld_library_path = "/usr/lib/win32";
struct elfdll_image
{
@ -201,7 +204,8 @@ static WINE_MODREF *ELFDLL_CreateModref(HMODULE hModule, LPCSTR path)
// wm->binfmt.pe.pe_resource = (PIMAGE_RESOURCE_DIRECTORY)RVA(hModule, dir->VirtualAddress);
wm->filename = strdup( path );
wm->filename = malloc(strlen(path)+1);
strcpy(wm->filename, path);
wm->modname = strrchr( wm->filename, '\\' );
if (!wm->modname) wm->modname = wm->filename;
else wm->modname++;
@ -275,7 +279,7 @@ WINE_MODREF *ELFDLL_LoadLibraryExA(LPCSTR path, DWORD flags)
}
*/
wm = ELFDLL_CreateModref(dlhandle, path);
wm = ELFDLL_CreateModref((int)dlhandle, path);
if(!wm)
{
ERR("Could not create WINE_MODREF for %s\n", path);
@ -297,4 +301,4 @@ void ELFDLL_UnloadLibrary(WINE_MODREF *wm)
{
}
#endif /*HAVE_LIBDL*/

View file

@ -11,8 +11,9 @@
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <stdarg.h>
#include <wine/windef.h>
//#include <wine/winbase.h>
int dbg_header_err( const char *dbg_channel, const char *func )
{
return 0;
@ -35,6 +36,12 @@ int dbg_vprintf( const char *format, ... )
}
int __vprintf( const char *format, ... )
{
#ifdef DETAILED_OUT
va_list va;
va_start(va, format);
vprintf(format, va);
va_end(va);
#endif
return 0;
}
@ -56,17 +63,7 @@ int HeapFree(int heap, int flags, void* mem)
free(mem);
return 1;
}
/*
void EnterCriticalSection(void* q)
{
return;
}
void LeaveCriticalSection(void* q)
{
return;
}
*/
static int last_error;
int GetLastError()
@ -155,7 +152,10 @@ int IsBadReadPtr(void* data, int size)
}
char* HEAP_strdupA(const char* string)
{
return strdup(string);
// return strdup(string);
char* answ=malloc(strlen(string)+1);
strcpy(answ, string);
return answ;
}
short* HEAP_strdupAtoW(void* heap, void* hz, const char* string)
{
@ -337,8 +337,7 @@ DWORD flProtect, DWORD dwMaxHigh, DWORD dwMaxLow, const char* name)
lseek(hFile, 0, SEEK_SET);
}
else len=dwMaxLow;
// len=min(len, dwMaxLow);
#warning fixme - should analyze flProtect
if(flProtect & PAGE_READONLY)
mmap_access |=PROT_READ;
else
@ -363,7 +362,10 @@ DWORD flProtect, DWORD dwMaxHigh, DWORD dwMaxLow, const char* name)
fm->next=NULL;
fm->handle=answer;
if(name)
fm->name=strdup(name);
{
fm->name=malloc(strlen(name)+1);
strcpy(fm->name, name);
}
else
fm->name=NULL;
fm->mapping_size=len;
@ -397,16 +399,61 @@ int UnmapViewOfFile(HANDLE handle)
}
return 0;
}
static int va_size=0;
//static int va_size=0;
struct virt_alloc_s;
typedef struct virt_alloc_s
{
int mapping_size;
char* address;
struct virt_alloc_s* next;
struct virt_alloc_s* prev;
int state;
}virt_alloc;
static virt_alloc* vm=0;
#define MEM_COMMIT 0x00001000
#define MEM_RESERVE 0x00002000
void* VirtualAlloc(void* address, DWORD size, DWORD type, DWORD protection)
{
void* answer;
int fd=open("/dev/zero", O_RDWR);
size=(size+0xffff)&(~0xffff);
// printf("VirtualAlloc(0x%08X, %d)\n", address
if(address!=0)
{
//check whether we can allow to allocate this
virt_alloc* str=vm;
while(str)
{
if((unsigned)address>=(unsigned)str->address+str->mapping_size)
{
str=str->prev;
continue;
}
if((unsigned)address+size<(unsigned)str->address)
{
str=str->prev;
continue;
}
if(str->state==0)
{
#warning FIXME
if(((unsigned)address+size<(unsigned)str->address+str->mapping_size) && (type & MEM_COMMIT))
{
close(fd);
return address; //returning previously reserved memory
}
return NULL;
}
close(fd);
return NULL;
}
answer=mmap(address, size, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, fd, 0);
}
else
answer=mmap(address, size, PROT_READ | PROT_WRITE | PROT_EXEC,
// ((address!=NULL) ? MAP_FIXED : MAP_SHARED), fd, 0);
MAP_PRIVATE, fd, 0);
MAP_PRIVATE, fd, 0);
// answer=FILE_dommap(-1, address, 0, size, 0, 0,
// PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE);
close(fd);
@ -418,31 +465,60 @@ void* VirtualAlloc(void* address, DWORD size, DWORD type, DWORD protection)
}
else
{
if(va_size!=0)
printf("Multiple VirtualAlloc!\n");
virt_alloc *new_vm=malloc(sizeof(virt_alloc));
new_vm->mapping_size=size;
new_vm->address=answer;
new_vm->prev=vm;
if(type & MEM_RESERVE)
new_vm->state=0;
else
new_vm->state=1;
if(vm)
vm->next=new_vm;
vm=new_vm;
vm->next=0;
// if(va_size!=0)
// printf("Multiple VirtualAlloc!\n");
// printf("answer=0x%08x\n", answer);
va_size=size;
return answer;
}
}
int VirtualFree(void* address, int t1, int t2)//not sure
{
int answer=munmap(address, va_size);
va_size=0;
return answer;
virt_alloc* str=vm;
int answer;
while(str)
{
if(address!=str->address)
{
str=str->prev;
continue;
}
answer=munmap(str->address, str->mapping_size);
if(str->next)str->next->prev=str->prev;
if(str->prev)str->prev->next=str->next;
if(vm==str)vm=0;
free(str);
return 0;
}
return -1;
}
int WideCharToMultiByte(unsigned int codepage, long flags, const short* src,
int srclen,char* dest, int destlen, const char* defch, int* used_defch)
{
#warning FIXME
int i;
// printf("WCh2MB: Src string ");
// for(i=0; i<=srclen; i++)printf(" %04X", src[i]);
if(src==0)
return 0;
if(dest==0)
return 0;
{
for(i=0; i<srclen; i++)
{
src++;
if(*src==0)
return i+1;
}
}
if(used_defch)
*used_defch=0;
for(i=0; i<min(srclen, destlen); i++)

View file

@ -2,7 +2,9 @@
#include <stdio.h>
#include <pthread.h>
#include <malloc.h>
#include <sys/types.h>
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/timeb.h>
#include <wine/winbase.h>
@ -10,11 +12,23 @@
#include <wine/winnt.h>
#include <wine/winerror.h>
#include <wine/debugtools.h>
#include <wine/module.h>
#include <registry.h>
static unsigned int localcount()
{
int a;
__asm__ __volatile__("rdtsc\n\t"
:"=a"(a)
:
:"edx");
return a;
}
void dbgprintf(char* fmt, ...)
{
#ifdef DETAILED_OUT
va_list va;
FILE* f;
va_start(va, fmt);
@ -22,6 +36,7 @@ void dbgprintf(char* fmt, ...)
vfprintf(f, fmt, va);
fsync(f);
fclose(f);
#endif
}
char export_names[500][30]={
"name1",
@ -64,6 +79,11 @@ void* my_mreq(int size, int to_zero)
heap=malloc(20000000);
memset(heap, 0xCC,20000000);
}
if(heap==0)
{
printf("No enough memory\n");
return 0;
}
if(heap_counter+size>20000000)
{
printf("No enough memory\n");
@ -82,6 +102,11 @@ void* my_mreq(int size, int to_zero)
int my_release(char* memory)
{
test_heap();
if(memory==NULL)
{
printf("ERROR: free(0)\n");
return 0;
}
if(*(int*)(memory-8)!=0x433476)
{
printf("MEMORY CORRUPTION !!!!!!!!!!!!!!!!!!!\n");
@ -118,6 +143,7 @@ int WINAPI ext_unknown()
}
int WINAPI expIsBadWritePtr(void* ptr, unsigned int count)
{
dbgprintf("IsBadWritePtr(%x, %x)\n", ptr, count);
if(count==0)
return 0;
if(ptr==0)
@ -126,6 +152,7 @@ int WINAPI expIsBadWritePtr(void* ptr, unsigned int count)
}
int WINAPI expIsBadReadPtr(void* ptr, unsigned int count)
{
dbgprintf("IsBadReadPtr(%x, %x)\n", ptr, count);
if(count==0)
return 0;
if(ptr==0)
@ -136,11 +163,19 @@ void* CDECL expmalloc(int size)
{
//printf("malloc");
// return malloc(size);
return my_mreq(size,0);
void* result=my_mreq(size,0);
dbgprintf("malloc(%x)\n", size);
if(result==0)
{
dbgprintf("returns 0\n");
printf("WARNING: malloc() failed\n");
}
return result;
}
void CDECL expfree(void* mem)
{
// return free(mem);
dbgprintf("free(%x)\n", mem);
my_release(mem);
}
void* CDECL expnew(int size)
@ -149,15 +184,25 @@ void* CDECL expnew(int size)
// printf("%08x %08x %08x %08x\n",
// size, *(1+(int*)&size),
// *(2+(int*)&size),*(3+(int*)&size));
return malloc(size);
void* result=expmalloc(size);
dbgprintf("new(%x)\n", size);
if(result==0)
{
dbgprintf("returns 0\n");
printf("WARNING: malloc() failed\n");
}
return result;
}
int CDECL expdelete(void* memory)
{
free(memory);
dbgprintf("delete(%x)\n", memory);
expfree(memory);
return 0;
}
int WINAPI expDisableThreadLibraryCalls(int module)
{
dbgprintf("DisableThreadLibraryCalls(%x)\n", module);
return 0;
}
int CDECL exp_initterm(int v1, int v2)
@ -174,8 +219,18 @@ typedef struct {
void* WINAPI expGetDriverModuleHandle(DRVR* pdrv)
{
dbgprintf("GetDriverModuleHandle(%x)\n", pdrv);
return pdrv->hDriverModule;
}
void* WINAPI expGetModuleHandleA(const char* name)
{
WINE_MODREF* wm;
dbgprintf("GetModuleHandleA(%s)\n", name);
wm=MODULE_FindModule(name);
if(wm==0)return 0;
return (void*)(wm->module);
}
struct th_list_t;
typedef struct th_list_t{
int id;
@ -233,7 +288,6 @@ void* WINAPI expCreateEventA(void* pSecAttr, char bManualReset,
{
#warning ManualReset
pthread_mutex_t *pm;
// printf("CreateEvent:");
dbgprintf("CreateEvent\n");
if(mlist!=NULL)
{
@ -243,7 +297,7 @@ void* WINAPI expCreateEventA(void* pSecAttr, char bManualReset,
{
if(strcmp(pp->name, name)==0)
return pp->pm;
}while(pp=pp->next);
}while(pp=pp->prev);
}
pm=my_mreq(sizeof(pthread_mutex_t), 0);
pthread_mutex_init(pm, NULL);
@ -298,6 +352,7 @@ void WINAPI expGetSystemInfo(SYSTEM_INFO* si)
static int cache = 0;
static SYSTEM_INFO cachedsi;
HKEY xhkey=0,hkey;
dbgprintf("GetSystemInfo()\n");
if (cache) {
memcpy(si,&cachedsi,sizeof(*si));
@ -463,7 +518,15 @@ long WINAPI expHeapDestroy(void* heap)
dbgprintf("HeapDestroy(%X)\n", heap);
my_release(heap);
return 1;
}
}
long WINAPI expHeapFree(int arg1, int arg2, void* ptr)
{
dbgprintf("HeapFree(%X, %X, %X)\n", arg1, arg2, ptr);
my_release(ptr);
return 1;
}
void* WINAPI expVirtualAlloc(void* v1, long v2, long v3, long v4)
{
void* z;
@ -478,28 +541,48 @@ int WINAPI expVirtualFree(void* v1, int v2, int v3)
dbgprintf("VirtualFree(%X %X %X) \n",v1,v2,v3);
return VirtualFree(v1,v2,v3);
}
struct CRITSECT
{
pthread_t id;
pthread_mutex_t mutex;
int locked;
};
void WINAPI expInitializeCriticalSection(CRITICAL_SECTION* c)
{
struct CRITSECT cs;
dbgprintf("InitCriticalSection(%X) \n", c);
if(sizeof(pthread_mutex_t)>sizeof(CRITICAL_SECTION))
/* if(sizeof(pthread_mutex_t)>sizeof(CRITICAL_SECTION))
{
printf(" ERROR:::: sizeof(pthread_mutex_t) is %d, expected <=%d!\n",
sizeof(pthread_mutex_t), sizeof(CRITICAL_SECTION));
return;
}
pthread_mutex_init((pthread_mutex_t*)c, NULL);
}*/
/* pthread_mutex_init((pthread_mutex_t*)c, NULL); */
pthread_mutex_init(&cs.mutex, NULL);
cs.locked=0;
*(void**)c=malloc(sizeof cs);
memcpy(*(void**)c, &cs, sizeof cs);
return;
}
void WINAPI expEnterCriticalSection(CRITICAL_SECTION* c)
{
struct CRITSECT* cs=(struct CRITSECT*)c;
dbgprintf("EnterCriticalSection(%X) \n",c);
pthread_mutex_lock((pthread_mutex_t*)c);
// cs.id=pthread_self();
if(cs->locked)
if(cs->id==pthread_self())
return;
pthread_mutex_lock(&(cs->mutex));
cs->locked=1;
cs->id=pthread_self();
return;
}
void WINAPI expLeaveCriticalSection(CRITICAL_SECTION* c)
{
struct CRITSECT* cs=(struct CRITSECT*)c;
dbgprintf("LeaveCriticalSection(%X) \n",c);
pthread_mutex_unlock((pthread_mutex_t*)c);
cs->locked=0;
pthread_mutex_unlock(&(cs->mutex));
return;
}
void WINAPI expDeleteCriticalSection(CRITICAL_SECTION *c)
@ -613,9 +696,9 @@ int WINAPI expLoadStringA(long instance, long id, void* buf, long size)
long WINAPI expMultiByteToWideChar(long v1, long v2, char* s1, long siz1, char* s2, int siz2)
{
#warning fixme
#warning FIXME
dbgprintf("MB2WCh\n");
// printf("WARNING: Unsupported call: MBToWCh %s\n", s1);
printf("WARNING: Unsupported call: MBToWCh %s\n", s1);
if(s2==0)
return 1;
s2[0]=s2[1]=0;
@ -636,12 +719,32 @@ long WINAPI expGetVersionExA(OSVERSIONINFOA* c)
strcpy(c->szCSDVersion, "Win98");
return 1;
}
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/sem.h>
HANDLE WINAPI expCreateSemaphoreA(char* v1, long init_count, long max_count, char* name)
{
#warning fixme
#warning FIXME
/* struct sembuf buf[1];
int sem=semget(IPC_PRIVATE,1,IPC_CREAT);
if(sem==-1)
{
printf("semget() failed\n");
return (HANDLE)-1;
}
buf[0].sem_num=0;
printf("%s\n", name);
printf("Init count %d, max count %d\n", init_count, max_count);
buf[0].sem_op=-max_count+init_count;
buf[0].sem_flg=0;
if(semop(sem, &buf, 1)<0)
{
printf("semop() failed\n");
}
return sem;
*/
void* z;
dbgprintf("CreateSemaphoreA\n");
// printf("CreateSemaphore:");
z=my_mreq(24, 0);
pthread_mutex_init(z, NULL);
return (HANDLE)z;
@ -649,47 +752,66 @@ HANDLE WINAPI expCreateSemaphoreA(char* v1, long init_count, long max_count, cha
long WINAPI expReleaseSemaphore(long hsem, long increment, long* prev_count)
{
// The state of a semaphore object is signaled when its count
// is greater than zero and nonsignaled when its count is equal to zero
// Each time a waiting thread is released because of the semaphore's signaled
// state, the count of the semaphore is decreased by one.
// The state of a semaphore object is signaled when its count
// is greater than zero and nonsignaled when its count is equal to zero
// Each time a waiting thread is released because of the semaphore's signaled
// state, the count of the semaphore is decreased by one.
struct sembuf buf[1];
dbgprintf("ReleaseSemaphore\n");
printf("WARNING: Unsupported call: ReleaseSemaphoreA\n");
/* if(hsem==-1)return 0;
buf[0].sem_num=0;
buf[0].sem_op=-1;
buf[0].sem_flg=0;
if(semop(hsem, &buf, 1)<0)
{
printf("ReleaseSemaphore: semop() failed\n");
}*/
return 1;//zero on error
}
long WINAPI expRegOpenKeyExA(long key, char* subkey, long reserved, long access, long* newkey)
{
long WINAPI expRegOpenKeyExA(long key, const char* subkey, long reserved, long access, int* newkey)
{
dbgprintf("RegOpenKeyExA(%d,%s)\n", key, subkey);
return RegOpenKeyExA(key, subkey, reserved, access, newkey);
}
long WINAPI expRegCloseKey(long key)
{
{
dbgprintf("RegCloseKey()\n");
return RegCloseKey(key);
}
long WINAPI expRegQueryValueExA(long key, char* value, int* reserved, int* type, int* data, int* count)
long WINAPI expRegQueryValueExA(long key, const char* value, int* reserved, int* type, int* data, int* count)
{
dbgprintf("RegQueryValueExA()\n");
return RegQueryValueExA(key, value, reserved, type, data, count);
}
long WINAPI expRegCreateKeyExA(long key, char* name, long reserved,
void* classs, long options, long security,
long WINAPI expRegCreateKeyExA(long key, const char* name, long reserved,
void* classs, long options, long security,
void* sec_attr, int* newkey, int* status)
{
{
dbgprintf("RegCreateKeyExA()\n");
return RegCreateKeyExA(key, name, reserved, classs, options, security, sec_attr, newkey, status);
}
long WINAPI expRegSetValueExA(long key, char* name, long v1, long v2, void* data, long size)
long WINAPI expRegSetValueExA(long key, const char* name, long v1, long v2, void* data, long size)
{
dbgprintf("RegSetValueExA()\n");
return RegSetValueExA(key, name, v1, v2, data, size);
}
long WINAPI expRegOpenKeyA ( long hKey, LPCSTR lpSubKey, long* phkResult )
{
long WINAPI expRegOpenKeyA (
long hKey,
LPCSTR lpSubKey,
int* phkResult
){
return RegOpenKeyExA(hKey, lpSubKey, 0, 0, phkResult);
}
}
long WINAPI expQueryPerformanceCounter(long long* z)
{
__asm__(
dbgprintf("QueryPerformanceCounter()\n");
__asm__ __volatile__(
"rdtsc\n\t"
"movl %%eax, 0(%0)\n\t"
"movl %%edx, 4(%0)\n\t"
@ -697,17 +819,69 @@ long WINAPI expQueryPerformanceCounter(long long* z)
return 1;
}
static double old_freq()
{
int i=time(NULL);
int x,y;
while(i==time(NULL));
x=localcount();
i++;
while(i==time(NULL));
y=localcount();
return (double)(y-x)/1000.;
}
static double CPU_Freq()
{
FILE *f = fopen ("/proc/cpuinfo", "r");
char line[200];
char model[200]="unknown";
char flags[500]="";
char *s,*value;
double freq=-1;
if (!f)
{
printf("Can't open /proc/cpuinfo for reading\n");
return old_freq();
}
while (fgets(line,200,f)!=NULL)
{
/* NOTE: the ':' is the only character we can rely on */
if (!(value = strchr(line,':')))
continue;
/* terminate the valuename */
*value++ = '\0';
/* skip any leading spaces */
while (*value==' ') value++;
if ((s=strchr(value,'\n')))
*s='\0';
if (!strncasecmp(line, "cpu MHz",strlen("cpu MHz")))
{
sscanf(value, "%lf", &freq);
freq*=1000;
break;
}
continue;
}
fclose(f);
if(freq<0)return old_freq();
return freq;
}
long WINAPI expQueryPerformanceFrequency(long long* z)
{
#warning fixme
*z=(long long)550000000;
dbgprintf("QueryPerformanceFrequency()\n");
*z=(long long)CPU_Freq();
return 1;
}
long WINAPI exptimeGetTime()
{
struct timeb t;
ftime(&t);
return 1000*t.time+t.millitm;
{
struct timeval t;
dbgprintf("timeGetTime()\n");
gettimeofday(&t, 0);
return 1000*t.tv_sec+t.tv_usec/1000;
}
void* WINAPI expLocalHandle(void* v)
{
@ -723,7 +897,7 @@ int WINAPI expGlobalUnlock(void* v)
{
dbgprintf("GlobalUnlock\n");
return 1;
}
}
//
void* WINAPI expGlobalFree(void* v)
{
@ -736,7 +910,7 @@ int WINAPI expLocalUnlock(void* v)
{
dbgprintf("LocalUnlock\n");
return 1;
}
}
//
void* WINAPI expLocalFree(void* v)
{
@ -744,14 +918,12 @@ void* WINAPI expLocalFree(void* v)
my_release(v);
return 0;
}
// HRSRC fun(HMODULE module, char* name, char* type)
HRSRC WINAPI expFindResourceA(HMODULE module, char* name, char* type)
{
dbgprintf("FindResourceA\n");
return FindResourceA(module, name, type);
}
//HGLOBAL fun(HMODULE module, HRSRC res)
}
HGLOBAL WINAPI expLoadResource(HMODULE module, HRSRC res)
{
dbgprintf("LoadResource\n");
@ -762,12 +934,12 @@ void* WINAPI expLockResource(long res)
dbgprintf("LockResource\n");
return LockResource(res);
}
int /*bool*/ WINAPI expFreeResource(long res)
int WINAPI expFreeResource(long res)
{
dbgprintf("FreeResource\n");
return FreeResource(res);
}
//bool fun(HANDLE)
//bool fun(HANDLE)
//!0 on success
int WINAPI expCloseHandle(long v1)
{
@ -778,11 +950,11 @@ int WINAPI expCloseHandle(long v1)
const char* WINAPI expGetCommandLineA()
{
dbgprintf("GetCommandLine\n");
return "aviplay";
return "c:\\aviplay.exe";
}
LPWSTR WINAPI expGetEnvironmentStringsW()
{
static short envs[]={0};
static short envs[]={'p', 'a', 't', 'h', ' ', 'c', ':', '\\', 0};
dbgprintf("GetEnvStringsW\n");
return envs;
}
@ -827,15 +999,27 @@ int WINAPI expGetACP()
printf("WARNING: Unsupported call: GetACP\n");
return 0;
}
extern WINE_MODREF *MODULE32_LookupHMODULE(HMODULE m);
int WINAPI expGetModuleFileNameA(int module, char* s, int len)
{
WINE_MODREF *mr;
dbgprintf("GetModuleFileNameA\n");
printf("File name of module %X requested\n", module);
if(s==0)
return 0;
if(len<10)
if(len<35)
return 0;
strcpy(s, "aviplay");
strcpy(s, "c:\\windows\\system\\");
mr=MODULE32_LookupHMODULE(module);
if(mr==0)//oops
{
strcat(s, "aviplay.dll");
return 1;
}
if(strrchr(mr->filename, '/')==NULL)
strcat(s, mr->filename);
else
strcat(s, strrchr(mr->filename, '/')+1);
return 1;
}
@ -856,7 +1040,6 @@ int WINAPI expFreeLibrary(int module)
{
dbgprintf("FreeLibrary\n");
return FreeLibrary(module);
// return 0;
}
void* WINAPI expGetProcAddress(HMODULE mod, char* name)
{
@ -873,10 +1056,8 @@ long WINAPI expCreateFileMappingA(int hFile, void* lpAttr,
long WINAPI expOpenFileMappingA(long hFile, long hz, const char* name)
{
#warning fixme
// dbgprintf("OpenFileMappingA\n");
dbgprintf("OpenFileMappingA\n");
return OpenFileMappingA(hFile, hz, name);
// return 0;
}
void* WINAPI expMapViewOfFile(HANDLE file, DWORD mode, DWORD offHigh, DWORD offLow, DWORD size)
@ -886,36 +1067,129 @@ void* WINAPI expMapViewOfFile(HANDLE file, DWORD mode, DWORD offHigh, DWORD offL
return (char*)file+offLow;
}
void* WINAPI expUnmapViewOfFile(void* view)
{
dbgprintf("UnmapViewOfFile()\n");
return 0;
}
void* WINAPI expSleep(int time)
{
dbgprintf("Sleep(%d)\n", time);
usleep(time);
return 0;
}
// why does IV32 codec want to call this? I don't know ...
void* WINAPI expCreateCompatibleDC(int hdc)
{
dbgprintf("CreateCompatibleDC(%d)\n", hdc);
return (void*)129;
}
int WINAPI expGetDeviceCaps(int hdc, int unk)
{
dbgprintf("GetDeviceCaps(%d, %d)\n", hdc, unk);
return 0;
}
WIN_BOOL WINAPI expDeleteDC(int hdc)
{
dbgprintf("DeleteDC(%d)\n", hdc);
return 0;
}
int expwsprintfA(char* string, char* format, ...)
{
va_list va;
va_start(va, format);
dbgprintf("wsprintfA\n");
return vsprintf(string, format, va);
}
int WINAPI expGetPrivateProfileIntA(char* appname, char* keyname, int default_value, char* filename)
{
int size=4;
char* fullname;
dbgprintf("GetPrivateProfileIntA(%s, %s, %s)\n", appname, keyname, filename );
if(!(appname && keyname && filename) ) return default_value;
fullname=(char*)malloc(50+strlen(appname)+strlen(keyname)+strlen(filename));
strcpy(fullname, "Software\\IniFileMapping\\");
strcat(fullname, appname);
strcat(fullname, "\\");
strcat(fullname, keyname);
strcat(fullname, "\\");
strcat(fullname, filename);
RegQueryValueExA(HKEY_LOCAL_MACHINE, fullname, NULL, NULL, &default_value, &size);
free(fullname);
return default_value;
}
int WINAPI expDefDriverProc(int _private, int id, int msg, int arg1, int arg2)
{
printf("Called DefDriverProc(%X)\n", msg);
return 0;
}
int WINAPI expSizeofResource(int v1, int v2)
{
dbgprintf("SizeofResource()\n");
return SizeofResource(v1, v2);
}
int WINAPI expGetLastError()
{
dbgprintf("GetLastError()\n");
return GetLastError();
}
void WINAPI expSetLastError(int error)
{
dbgprintf("SetLastError()\n");
SetLastError(error);
}
char* expstrrchr(char* string, int value)
{
return strrchr(string, value);
}
char* expstrchr(char* string, int value)
{
return strchr(string, value);
}
int WINAPI expGetFileVersionInfoSizeA(const char* name, int* lpHandle)
{
printf("GetFileVersionInfoSizeA(%s,0x%X)\n", name, lpHandle);
return 0;
}
int WINAPI expIsBadStringPtrW(const short* string, int nchars)
{
if(string==0)return 1;
return 0;
}
extern long WINAPI InterlockedExchangeAdd( long* dest, long incr )
{
long ret;
__asm__ __volatile__( "lock; xaddl %0,(%1)"
: "=r" (ret) : "r" (dest), "0" (incr) : "memory" );
return ret;
}
extern long WINAPI expInterlockedIncrement( long* dest )
{
return InterlockedExchangeAdd( dest, 1 ) + 1;
}
extern long WINAPI expInterlockedDecrement( long* dest )
{
return InterlockedExchangeAdd( dest, -1 ) - 1;
}
extern void WINAPI expOutputDebugStringA( const char* string )
{
fprintf(stderr, "DEBUG: %s\n", string);
}
struct exports
{
@ -936,6 +1210,7 @@ struct libs
struct exports exp_kernel32[]={
FF(IsBadWritePtr, 357)
FF(IsBadReadPtr, 354)
FF(IsBadStringPtrW, -1)
FF(DisableThreadLibraryCalls, -1)
FF(CreateThread, -1)
FF(CreateEventA, -1)
@ -947,10 +1222,11 @@ FF(GetVersion, 332)
FF(HeapCreate, 461)
FF(HeapAlloc, -1)
FF(HeapDestroy, -1)
FF(HeapFree, -1)
FF(VirtualAlloc, -1)
FF(VirtualFree, -1)
FF(InitializeCriticalSection, -1)
FF(EnterCriticalSection, -1)
FF(EnterCriticalSection, -1)
FF(LeaveCriticalSection, -1)
FF(DeleteCriticalSection, -1)
FF(TlsAlloc, -1)
@ -960,7 +1236,7 @@ FF(TlsSetValue, -1)
FF(GetCurrentThreadId, -1)
FF(LocalAlloc, -1)
FF(LocalLock, -1)
FF(GlobalAlloc, -1)
FF(GlobalAlloc, -1)
FF(GlobalLock, -1)
FF(MultiByteToWideChar, 427)
FF(WideCharToMultiByte, -1)
@ -979,10 +1255,11 @@ FF(ReleaseSemaphore, -1)
FF(FindResourceA, -1)
FF(LockResource, -1)
FF(FreeResource, -1)
FF(SizeofResource, -1)
FF(CloseHandle, -1)
FF(GetCommandLineA, -1)
FF(GetEnvironmentStringsW, -1)
FF(FreeEnvironmentStringsW, -1)
FF(GetEnvironmentStringsW, -1)
FF(FreeEnvironmentStringsW, -1)
FF(GetEnvironmentStrings, -1)
FF(GetStartupInfoA, -1)
FF(GetStdHandle, -1)
@ -997,7 +1274,15 @@ FF(FreeLibrary, -1)
FF(CreateFileMappingA, -1)
FF(OpenFileMappingA, -1)
FF(MapViewOfFile, -1)
FF(UnmapViewOfFile, -1)
FF(Sleep, -1)
FF(GetModuleHandleA, -1)
FF(GetPrivateProfileIntA, -1)
FF(GetLastError, -1)
FF(SetLastError, -1)
FF(InterlockedIncrement, -1)
FF(InterlockedDecrement, -1)
FF(OutputDebugStringA, -1)
};
struct exports exp_msvcrt[]={
@ -1006,13 +1291,17 @@ FF(_initterm, -1)
FF(free, -1)
{"??3@YAXPAX@Z", -1, expdelete},
{"??2@YAPAXI@Z", -1, expnew},
FF(strrchr, -1)
FF(strchr, -1)
};
struct exports exp_winmm[]={
FF(GetDriverModuleHandle, -1)
FF(timeGetTime, -1)
FF(timeGetTime, -1)
FF(DefDriverProc, -1)
};
struct exports exp_user32[]={
FF(LoadStringA, -1)
FF(wsprintfA, -1)
};
struct exports exp_advapi32[]={
FF(RegOpenKeyA, -1)
@ -1022,6 +1311,14 @@ FF(RegQueryValueExA, -1)
FF(RegSetValueExA, -1)
FF(RegCloseKey, -1)
};
struct exports exp_gdi32[]={
FF(CreateCompatibleDC, -1)
FF(GetDeviceCaps, -1)
FF(DeleteDC, -1)
};
struct exports exp_version[]={
FF(GetFileVersionInfoSizeA, -1)
};
#define LL(X) \
{#X".dll", sizeof(exp_##X)/sizeof(struct exports), exp_##X},
@ -1031,6 +1328,8 @@ LL(msvcrt)
LL(winmm)
LL(user32)
LL(advapi32)
LL(gdi32)
LL(version)
};
void* LookupExternal(const char* library, int ordinal)
@ -1070,6 +1369,7 @@ void* LookupExternalByName(const char* library, const char* name)
{
char* answ;
int i,j;
// return (void*)ext_unknown;
if(library==0)
{
printf("ERROR: library=0\n");

View file

@ -5,12 +5,42 @@
*/
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#ifdef __linux__
#include <asm/unistd.h>
#include <asm/ldt.h>
#else
#define LDT_ENTRIES 8192
#define LDT_ENTRY_SIZE 8
struct modify_ldt_ldt_s {
unsigned int entry_number;
unsigned long base_addr;
unsigned int limit;
unsigned int seg_32bit:1;
unsigned int contents:2;
unsigned int read_exec_only:1;
unsigned int limit_in_pages:1;
unsigned int seg_not_present:1;
unsigned int useable:1;
};
#define MODIFY_LDT_CONTENTS_DATA 0
#define MODIFY_LDT_CONTENTS_STACK 1
#define MODIFY_LDT_CONTENTS_CODE 2
#define __NR_modify_ldt 123
#endif
#include <wine/windef.h>
#include <wine/winerror.h>
#include <wine/heap.h>
@ -29,6 +59,132 @@ typedef struct modref_list_t
modref_list;
/***********************************************************************
* LDT_EntryToBytes
*
* Convert an ldt_entry structure to the raw bytes of the descriptor.
*/
static void LDT_EntryToBytes( unsigned long *buffer, const struct modify_ldt_ldt_s *content )
{
*buffer++ = ((content->base_addr & 0x0000ffff) << 16) |
(content->limit & 0x0ffff);
*buffer = (content->base_addr & 0xff000000) |
((content->base_addr & 0x00ff0000)>>16) |
(content->limit & 0xf0000) |
(content->contents << 10) |
((content->read_exec_only == 0) << 9) |
((content->seg_32bit != 0) << 22) |
((content->limit_in_pages != 0) << 23) |
0xf000;
}
//
// funcs:
//
// 0 read LDT
// 1 write old mode
// 0x11 write
//
static int modify_ldt( int func, struct modify_ldt_ldt_s *ptr,
unsigned long count )
{
int res;
#ifdef __PIC__
__asm__ __volatile__( "pushl %%ebx\n\t"
"movl %2,%%ebx\n\t"
"int $0x80\n\t"
"popl %%ebx"
: "=a" (res)
: "0" (__NR_modify_ldt),
"r" (func),
"c" (ptr),
"d" (sizeof(struct modify_ldt_ldt_s)*count) );
#else
__asm__ __volatile__("int $0x80"
: "=a" (res)
: "0" (__NR_modify_ldt),
"b" (func),
"c" (ptr),
"d" (sizeof(struct modify_ldt_ldt_s)*count) );
#endif /* __PIC__ */
if (res >= 0) return res;
errno = -res;
return -1;
}
static int fs_installed=0;
static char* fs_seg=0;
static int install_fs()
{
struct modify_ldt_ldt_s array;
int fd;
int ret;
void* prev_struct;
if(fs_installed)
return 0;
fd=open("/dev/zero", O_RDWR);
fs_seg=mmap((void*)0xbf000000, 0x30000, PROT_READ | PROT_WRITE, MAP_PRIVATE,
fd, 0);
if(fs_seg==0)
{
printf("ERROR: Couldn't allocate memory for fs segment\n");
return -1;
}
array.base_addr=((int)fs_seg+0xffff) & 0xffff0000;
array.entry_number=0x1;
array.limit=array.base_addr+getpagesize()-1;
array.seg_32bit=1;
array.read_exec_only=0;
array.seg_not_present=0;
array.contents=MODIFY_LDT_CONTENTS_DATA;
array.limit_in_pages=0;
#ifdef linux
ret=modify_ldt(0x1, &array, 1);
if(ret<0)
{
perror("install_fs");
MESSAGE("Couldn't install fs segment, expect segfault\n");
}
#endif /*linux*/
#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__OpenBSD__)
{
long d[2];
LDT_EntryToBytes( d, &array );
ret = i386_set_ldt(0x1, (union descriptor *)d, 1);
if (ret < 0)
{
perror("install_fs");
MESSAGE("Did you reconfigure the kernel with \"options USER_LDT\"?\n");
}
}
#endif /* __NetBSD__ || __FreeBSD__ || __OpenBSD__ */
__asm__
(
"movl $0xf,%eax\n\t"
"pushw %ax\n\t"
"movw %ax, %fs\n\t"
);
prev_struct=malloc(8);
*(void**)array.base_addr=prev_struct;
printf("prev_struct: 0x%X\n", prev_struct);
close(fd);
fs_installed=1;
return 0;
};
static int uninstall_fs()
{
if(fs_seg==0)
return -1;
munmap(fs_seg, 0x30000);
return 0;
}
//WINE_MODREF *local_wm=NULL;
modref_list* local_wm=NULL;
@ -40,6 +196,7 @@ WINE_MODREF *MODULE_FindModule(LPCSTR m)
return NULL;
while(strcmp(m, list->wm->filename))
{
printf("%s: %x\n", list->wm->filename, list->wm->module);
list=list->prev;
if(list==NULL)
return NULL;
@ -59,6 +216,7 @@ void MODULE_RemoveFromList(WINE_MODREF *mod)
{
free(list);
local_wm=NULL;
uninstall_fs();
return;
}
for(;list;list=list->prev)
@ -241,330 +399,6 @@ void MODULE_DllProcessDetach( WINE_MODREF* wm, WIN_BOOL bForceDetach, LPVOID lpR
MODULE_InitDll( wm, DLL_PROCESS_DETACH, lpReserved );
}
/*************************************************************************
* MODULE_DllThreadAttach
*
* Send DLL thread attach notifications. These are sent in the
* reverse sequence of process detach notification.
*
*/
/*
void MODULE_DllThreadAttach( LPVOID lpReserved )
{
WINE_MODREF *wm;
MODULE_InitDll( wm, DLL_THREAD_ATTACH, lpReserved );
}*/
/*************************************************************************
* MODULE_DllThreadDetach
*
* Send DLL thread detach notifications. These are sent in the
* same sequence as process detach notification.
*
*/
/*
void MODULE_DllThreadDetach( LPVOID lpReserved )
{
WINE_MODREF *wm;
MODULE_InitDll( wm, DLL_THREAD_DETACH, lpReserved );
}
*/
/***********************************************************************
* MODULE_CreateDummyModule
*
* Create a dummy NE module for Win32 or Winelib.
*/
HMODULE MODULE_CreateDummyModule( LPCSTR filename, HMODULE module32 )
{
printf("MODULE_CreateDummyModule:: Not implemented\n");
return 0;
}
/*
HMODULE MODULE_CreateDummyModule( LPCSTR filename, HMODULE module32 )
{
HMODULE hModule;
NE_MODULE *pModule;
SEGTABLEENTRY *pSegment;
char *pStr,*s;
unsigned int len;
const char* basename;
OFSTRUCT *ofs;
int of_size, size;
// Extract base filename
basename = strrchr(filename, '\\');
if (!basename) basename = filename;
else basename++;
len = strlen(basename);
if ((s = strchr(basename, '.'))) len = s - basename;
// Allocate module
of_size = sizeof(OFSTRUCT) - sizeof(ofs->szPathName)
+ strlen(filename) + 1;
size = sizeof(NE_MODULE) +
// loaded file info
of_size +
// segment table: DS,CS
2 * sizeof(SEGTABLEENTRY) +
// name table
len + 2 +
// several empty tables
8;
hModule = GlobalAlloc16( GMEM_MOVEABLE | GMEM_ZEROINIT, size );
if (!hModule) return (HMODULE)11; // invalid exe
FarSetOwner16( hModule, hModule );
pModule = (NE_MODULE *)GlobalLock16( hModule );
// Set all used entries
pModule->magic = IMAGE_OS2_SIGNATURE;
pModule->count = 1;
pModule->next = 0;
pModule->flags = 0;
pModule->dgroup = 0;
pModule->ss = 1;
pModule->cs = 2;
pModule->heap_size = 0;
pModule->stack_size = 0;
pModule->seg_count = 2;
pModule->modref_count = 0;
pModule->nrname_size = 0;
pModule->fileinfo = sizeof(NE_MODULE);
pModule->os_flags = NE_OSFLAGS_WINDOWS;
pModule->self = hModule;
pModule->module32 = module32;
// Set version and flags
if (module32)
{
pModule->expected_version =
((PE_HEADER(module32)->OptionalHeader.MajorSubsystemVersion & 0xff) << 8 ) |
(PE_HEADER(module32)->OptionalHeader.MinorSubsystemVersion & 0xff);
pModule->flags |= NE_FFLAGS_WIN32;
if (PE_HEADER(module32)->FileHeader.Characteristics & IMAGE_FILE_DLL)
pModule->flags |= NE_FFLAGS_LIBMODULE | NE_FFLAGS_SINGLEDATA;
}
// Set loaded file information
ofs = (OFSTRUCT *)(pModule + 1);
memset( ofs, 0, of_size );
ofs->cBytes = of_size < 256 ? of_size : 255; // FIXME
strcpy( ofs->szPathName, filename );
pSegment = (SEGTABLEENTRY*)((char*)(pModule + 1) + of_size);
pModule->seg_table = (int)pSegment - (int)pModule;
// Data segment
pSegment->size = 0;
pSegment->flags = NE_SEGFLAGS_DATA;
pSegment->minsize = 0x1000;
pSegment++;
// Code segment
pSegment->flags = 0;
pSegment++;
// Module name
pStr = (char *)pSegment;
pModule->name_table = (int)pStr - (int)pModule;
assert(len<256);
*pStr = len;
lstrcpynA( pStr+1, basename, len+1 );
pStr += len+2;
// All tables zero terminated
pModule->res_table = pModule->import_table = pModule->entry_table =
(int)pStr - (int)pModule;
NE_RegisterModule( pModule );
return hModule;
}
*/
/***********************************************************************
* MODULE_GetBinaryType
*
* The GetBinaryType function determines whether a file is executable
* or not and if it is it returns what type of executable it is.
* The type of executable is a property that determines in which
* subsystem an executable file runs under.
*
* Binary types returned:
* SCS_32BIT_BINARY: A Win32 based application
* SCS_DOS_BINARY: An MS-Dos based application
* SCS_WOW_BINARY: A Win16 based application
* SCS_PIF_BINARY: A PIF file that executes an MS-Dos based app
* SCS_POSIX_BINARY: A POSIX based application ( Not implemented )
* SCS_OS216_BINARY: A 16bit OS/2 based application
*
* Returns TRUE if the file is an executable in which case
* the value pointed by lpBinaryType is set.
* Returns FALSE if the file is not an executable or if the function fails.
*
* To do so it opens the file and reads in the header information
* if the extended header information is not present it will
* assume that the file is a DOS executable.
* If the extended header information is present it will
* determine if the file is a 16 or 32 bit Windows executable
* by check the flags in the header.
*
* Note that .COM and .PIF files are only recognized by their
* file name extension; but Windows does it the same way ...
*/
/*
static WIN_BOOL MODULE_GetBinaryType( HANDLE hfile, LPCSTR filename,
LPDWORD lpBinaryType )
{
IMAGE_DOS_HEADER mz_header;
char magic[4], *ptr;
DWORD len;
// Seek to the start of the file and read the DOS header information.
if ( SetFilePointer( hfile, 0, NULL, SEEK_SET ) != -1
&& ReadFile( hfile, &mz_header, sizeof(mz_header), &len, NULL )
&& len == sizeof(mz_header) )
{
// Now that we have the header check the e_magic field
// to see if this is a dos image.
//
if ( mz_header.e_magic == IMAGE_DOS_SIGNATURE )
{
WIN_BOOL lfanewValid = FALSE;
// We do have a DOS image so we will now try to seek into
// the file by the amount indicated by the field
// "Offset to extended header" and read in the
// "magic" field information at that location.
// This will tell us if there is more header information
// to read or not.
//
// But before we do we will make sure that header
// structure encompasses the "Offset to extended header"
// field.
//
if ( (mz_header.e_cparhdr<<4) >= sizeof(IMAGE_DOS_HEADER) )
if ( ( mz_header.e_crlc == 0 ) ||
( mz_header.e_lfarlc >= sizeof(IMAGE_DOS_HEADER) ) )
if ( mz_header.e_lfanew >= sizeof(IMAGE_DOS_HEADER)
&& SetFilePointer( hfile, mz_header.e_lfanew, NULL, SEEK_SET ) != -1
&& ReadFile( hfile, magic, sizeof(magic), &len, NULL )
&& len == sizeof(magic) )
lfanewValid = TRUE;
if ( !lfanewValid )
{
// If we cannot read this "extended header" we will
// assume that we have a simple DOS executable.
//
*lpBinaryType = SCS_DOS_BINARY;
return TRUE;
}
else
{
// Reading the magic field succeeded so
// we will try to determine what type it is.
//
if ( *(DWORD*)magic == IMAGE_NT_SIGNATURE )
{
// This is an NT signature.
//
*lpBinaryType = SCS_32BIT_BINARY;
return TRUE;
}
else if ( *(WORD*)magic == IMAGE_OS2_SIGNATURE )
{
// The IMAGE_OS2_SIGNATURE indicates that the
// "extended header is a Windows executable (NE)
// header." This can mean either a 16-bit OS/2
// or a 16-bit Windows or even a DOS program
// (running under a DOS extender). To decide
// which, we'll have to read the NE header.
///
IMAGE_OS2_HEADER ne;
if ( SetFilePointer( hfile, mz_header.e_lfanew, NULL, SEEK_SET ) != -1
&& ReadFile( hfile, &ne, sizeof(ne), &len, NULL )
&& len == sizeof(ne) )
{
switch ( ne.ne_exetyp )
{
case 2: *lpBinaryType = SCS_WOW_BINARY; return TRUE;
case 5: *lpBinaryType = SCS_DOS_BINARY; return TRUE;
default: *lpBinaryType = SCS_OS216_BINARY; return TRUE;
}
}
// Couldn't read header, so abort.
return FALSE;
}
else
{
// Unknown extended header, but this file is nonetheless
// DOS-executable.
//
*lpBinaryType = SCS_DOS_BINARY;
return TRUE;
}
}
}
}
// If we get here, we don't even have a correct MZ header.
// Try to check the file extension for known types ...
//
ptr = strrchr( filename, '.' );
if ( ptr && !strchr( ptr, '\\' ) && !strchr( ptr, '/' ) )
{
if ( !lstrcmpiA( ptr, ".COM" ) )
{
*lpBinaryType = SCS_DOS_BINARY;
return TRUE;
}
if ( !lstrcmpiA( ptr, ".PIF" ) )
{
*lpBinaryType = SCS_PIF_BINARY;
return TRUE;
}
}
return FALSE;
}
*/
/***********************************************************************
* GetBinaryTypeA [KERNEL32.280]
*/
/*
WIN_BOOL WINAPI GetBinaryTypeA( LPCSTR lpApplicationName, LPDWORD lpBinaryType )
{
WIN_BOOL ret = FALSE;
HANDLE hfile;
TRACE_(win32)("%s\n", lpApplicationName );
// Sanity check.
if ( lpApplicationName == NULL || lpBinaryType == NULL )
return FALSE;
// Open the file indicated by lpApplicationName for reading.
hfile = CreateFileA( lpApplicationName, GENERIC_READ, 0,
NULL, OPEN_EXISTING, 0, -1 );
if ( hfile == INVALID_HANDLE_VALUE )
return FALSE;
// Check binary type
ret = MODULE_GetBinaryType( hfile, lpApplicationName, lpBinaryType );
// Close the file.
CloseHandle( hfile );
return ret;
}
*/
/***********************************************************************
* LoadLibraryExA (KERNEL32)
@ -578,7 +412,9 @@ HMODULE WINAPI LoadLibraryExA(LPCSTR libname, HANDLE hfile, DWORD flags)
SetLastError(ERROR_INVALID_PARAMETER);
return 0;
}
if(fs_installed==0)
install_fs();
wm = MODULE_LoadLibraryExA( libname, hfile, flags );
if ( wm )
@ -619,11 +455,13 @@ WINE_MODREF *MODULE_LoadLibraryExA( LPCSTR libname, HFILE hfile, DWORD flags )
SetLastError( ERROR_FILE_NOT_FOUND );
TRACE("Trying native dll '%s'\n", libname);
pwm = PE_LoadLibraryExA(libname, flags);
#ifdef HAVE_LIBDL
if(!pwm)
{
TRACE("Trying ELF dll '%s'\n", libname);
pwm=ELFDLL_LoadLibraryExA(libname, flags);
pwm=(WINE_MODREF*)ELFDLL_LoadLibraryExA(libname, flags);
}
#endif
// printf("0x%08x\n", pwm);
// break;
if(pwm)
@ -761,10 +599,12 @@ FARPROC MODULE_GetProcAddress(
retproc = PE_FindExportedFunction( wm, function, snoop );
if (!retproc) SetLastError(ERROR_PROC_NOT_FOUND);
return retproc;
#ifdef HAVE_LIBDL
case MODULE32_ELF:
retproc = dlsym( wm->module, function);
retproc = (FARPROC) dlsym( wm->module, function);
if (!retproc) SetLastError(ERROR_PROC_NOT_FOUND);
return retproc;
#endif
default:
ERR("wine_modref type %d not handled.\n",wm->type);
SetLastError(ERROR_INVALID_HANDLE);

View file

@ -289,7 +289,7 @@ DWORD fixup_imports( WINE_MODREF *wm )
break;
//#warning FIXME: here we should fill imports
printf("Loading imports for %s\n", name);
TRACE("Loading imports for %s.dll\n", name);
if (pe_imp->u.OriginalFirstThunk != 0) {
TRACE("Microsoft style imports used\n");
@ -388,7 +388,7 @@ static void do_relocations( unsigned int load_addr, IMAGE_BASE_RELOCATION *r )
{
int offset = r->TypeOffset[i] & 0xFFF;
int type = r->TypeOffset[i] >> 12;
TRACE_(fixup)("patching %x type %x\n", offset, type);
// TRACE_(fixup)("patching %x type %x\n", offset, type);
switch(type)
{
case IMAGE_REL_BASED_ABSOLUTE: break;
@ -797,20 +797,12 @@ WINE_MODREF *PE_CreateModule( HMODULE hModule,
wm->binfmt.pe.pe_resource = pe_resource;
wm->binfmt.pe.tlsindex = -1;
wm->filename = strdup( filename );
wm->filename = malloc(strlen(filename)+1);
strcpy(wm->filename, filename );
wm->modname = strrchr( wm->filename, '\\' );
if (!wm->modname) wm->modname = wm->filename;
else wm->modname++;
// result = GetShortPathNameA( wm->filename, NULL, 0 );
// wm->short_filename = (char *)HeapAlloc( GetProcessHeap(), 0, result+1 );
// GetShortPathNameA( wm->filename, wm->short_filename, result+1 );
// wm->short_modname = strrchr( wm->short_filename, '\\' );
// if (!wm->short_modname) wm->short_modname = wm->short_filename;
// else wm->short_modname++;
// return NULL;
// }
if ( pe_export )
dump_exports( hModule );
@ -836,22 +828,15 @@ WINE_MODREF *PE_CreateModule( HMODULE hModule,
*/
WINE_MODREF *PE_LoadLibraryExA (LPCSTR name, DWORD flags)
{
// struct load_dll_request *req = get_req_buffer();
HMODULE hModule32;
WINE_MODREF *wm;
char filename[256];
// HANDLE hFile;
int hFile;
WORD version = 0;
// if ( SearchPathA( NULL, name, ".DLL",
// sizeof(filename), filename, NULL ) == 0 ) return NULL;
strncpy(filename, name, sizeof(filename));
// hFile = CreateFileA( filename, GENERIC_READ, FILE_SHARE_READ,
// NULL, OPEN_EXISTING, 0, -1 );
hFile=open(filename, O_RDONLY);
// if ( hFile == INVALID_HANDLE_VALUE ) return NULL;
if(hFile==-1)
return NULL;
@ -863,34 +848,12 @@ WINE_MODREF *PE_LoadLibraryExA (LPCSTR name, DWORD flags)
return NULL;
}
// Create 16-bit dummy module
/*
if ((hModule16 = MODULE_CreateDummyModule( filename, hModule32 )) < 32)
{
CloseHandle( hFile );
SetLastError( (DWORD)hModule16 ); // This should give the correct error
return NULL;
}
*/
if ( !(wm = PE_CreateModule( hModule32, filename, flags, FALSE )) )
{
ERR( "can't load %s\n", filename );
// FreeLibrary16( hModule16 );
SetLastError( ERROR_OUTOFMEMORY );
return NULL;
}
/*
if (wm->binfmt.pe.pe_export)
SNOOP_RegisterDLL(wm->module,wm->modname,wm->binfmt.pe.pe_export->NumberOfFunctions);
req->handle = hFile;
req->base = (void *)hModule32;
req->dbg_offset = 0;
req->dbg_size = 0;
req->name = &wm->modname;
server_call_noerr( REQ_LOAD_DLL );
*/
close(hFile);
return wm;
}
@ -944,13 +907,6 @@ WIN_BOOL PE_InitDLL( WINE_MODREF *wm, DWORD type, LPVOID lpReserved )
return retv;
}
/************************************************************************
* PE_InitTls (internal)
*
* If included, initialises the thread local storages of modules.
* Pointers in those structs are not RVAs but real pointers which have been
* relocated by do_relocations() already.
*/
static LPVOID
_fixup_address(PIMAGE_OPTIONAL_HEADER opt,int delta,LPVOID addr) {
if ( ((DWORD)addr>opt->ImageBase) &&

View file

@ -1,408 +1,410 @@
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/param.h>
#include <wine/winbase.h>
#include <wine/winreg.h>
#include <wine/winnt.h>
#include <wine/winerror.h>
#include <registry.h>
struct reg_value
{
int type;
char* name;
int len;
char* value;
};
static int reg_size=0;
static struct reg_value* regs=0;
struct reg_handle_s;
typedef struct reg_handle_s
{
int handle;
char* name;
struct reg_handle_s* next;
struct reg_handle_s* prev;
} reg_handle_t;
static reg_handle_t* head=0;
#define DIR -25
static void create_registry();
static void open_registry();
static void save_registry();
static void create_registry()
{
if(regs)
{
printf("Logic error: create_registry() called with existing registry\n");
save_registry();
return;
}
regs=(struct reg_value*)malloc(3*sizeof(struct reg_value));
regs[0].type=regs[1].type=DIR;
regs[0].name=strdup("HKLM");
regs[1].name=strdup("HKCU");
regs[0].value=regs[1].value=NULL;
regs[0].len=regs[1].len=0;
reg_size=2;
save_registry();
}
static void open_registry()
{
int fd;
int i;
int len;
char user_conf[PATH_MAX+1];
if(regs)
{
printf("Multiple open_registry(>\n");
return;
}
snprintf(user_conf, PATH_MAX, "%s/.gstreamer/win32/registry", getenv("HOME"));
fd=open(user_conf, O_RDONLY);
if(fd==-1)
{
printf("Creating new registry\n");
create_registry();
return;
}
read(fd, &reg_size, 4);
regs=(struct reg_value*)malloc(reg_size*sizeof(struct reg_value));
for(i=0; i<reg_size; i++)
{
read(fd,&regs[i].type,4);
read(fd,&len,4);
regs[i].name=(char*)malloc(len+1);
if(regs[i].name==0)
{
reg_size=i+1;
goto error;
}
read(fd, regs[i].name, len);
regs[i].name[len]=0;
read(fd,&regs[i].len,4);
regs[i].value=(char*)malloc(len);
if(regs[i].value==0)
{
free(regs[i].name);
reg_size=i+1;
goto error;
}
read(fd, regs[i].value, regs[i].len);
}
error:
close(fd);
return;
}
static void save_registry()
{
int fd, i, len, res;
char user_conf[PATH_MAX+1];
snprintf(user_conf, PATH_MAX, "%s/.gstreamer/", getenv("HOME"));
res=mkdir(user_conf, 00777);
snprintf(user_conf, PATH_MAX, "%s/.gstreamer/win32/", getenv("HOME"));
res=mkdir(user_conf, 00777);
if (res == -1 && errno != EEXIST)
{
printf("Failed to create directory %s/.gstreamer/win32.\n", getenv("HOME"));
perror("mkdir");
return;
}
snprintf(user_conf, PATH_MAX, "%s/.gstreamer/win32/registry", getenv("HOME"));
fd=open(user_conf, O_WRONLY | O_CREAT, 00777);
if(fd==-1)
{
printf("Failed to open registry file for writing.\n");
return;
}
write(fd, &reg_size, 4);
for(i=0; i<reg_size; i++)
{
write(fd, &regs[i].type, 4);
len=strlen(regs[i].name);
write(fd, &len, 4);
write(fd, regs[i].name, len);
write(fd, &regs[i].len, 4);
write(fd, regs[i].value, regs[i].len);
}
close(fd);
}
static reg_handle_t* find_handle_by_name(const char* name)
{
reg_handle_t* t;
for(t=head; t; t=t->prev)
{
if(!strcmp(t->name, name))
{
return t;
}
}
return 0;
}
static struct reg_value* find_value_by_name(const char* name)
{
int i;
for(i=0; i<reg_size; i++)
if(!strcmp(regs[i].name, name))
return regs+i;
return 0;
}
static reg_handle_t* find_handle(int handle)
{
reg_handle_t* t;
for(t=head; t; t=t->prev)
{
if(t->handle==handle)
{
return t;
}
}
return 0;
}
static int generate_handle()
{
static int zz=249;
zz++;
while((zz==HKEY_LOCAL_MACHINE) || (zz==HKEY_CURRENT_USER))
zz++;
return zz;
}
static reg_handle_t* insert_handle(long handle, const char* name)
{
reg_handle_t* t;
t=(reg_handle_t*)malloc(sizeof(reg_handle_t));
if(head==0)
{
t->prev=0;
}
else
{
head->next=t;
t->prev=head;
}
t->next=0;
t->name=strdup(name);
t->handle=handle;
head=t;
return t;
}
static char* build_keyname(long key, const char* subkey)
{
char* full_name;
reg_handle_t* t;
if((t=find_handle(key))==0)
{
printf("Invalid key\n");
return NULL;
}
if(subkey==NULL)
subkey="<default>";
full_name=(char*)malloc(strlen(t->name)+strlen(subkey)+10);
strcpy(full_name, t->name);
strcat(full_name, "\\");
strcat(full_name, subkey);
return full_name;
}
struct reg_value* insert_reg_value(int handle, const char* name, int type, void* value, int len)
{
reg_handle_t* t;
struct reg_value* v;
char* fullname;
if((fullname=build_keyname(handle, name))==NULL)
{
printf("Invalid handle\n");
return NULL;
}
if((v=find_value_by_name(fullname))==0)
//creating new value in registry
{
if(regs==0)
create_registry();
regs=(struct reg_value*)realloc(regs, sizeof(struct reg_value)*(reg_size+1));
v=regs+reg_size;
reg_size++;
}
else
//replacing old one
{
free(v->value);
free(v->name);
}
v->type=type;
v->len=len;
v->value=(char*)malloc(len);
memcpy(v->value, value, len);
v->name=strdup(fullname);
save_registry();
return v;
}
static void init_registry()
{
printf("Initializing registry\n");
open_registry();
insert_handle(HKEY_LOCAL_MACHINE, "HKLM");
insert_handle(HKEY_CURRENT_USER, "HKCU");
}
static reg_handle_t* find_handle_2(long key, char* subkey)
{
char* full_name;
reg_handle_t* t;
if((t=find_handle(key))==0)
{
printf("Invalid key\n");
return (reg_handle_t*)-1;
}
if(subkey==NULL)
return t;
full_name=(char*)malloc(strlen(t->name)+strlen(subkey)+10);
strcpy(full_name, t->name);
strcat(full_name, "\\");
strcat(full_name, subkey);
t=find_handle_by_name(full_name);
free(full_name);
return t;
}
long RegOpenKeyExA(long key, char* subkey, long reserved, long access, int* newkey)
{
char* full_name;
reg_handle_t* t;
struct reg_value* v;
printf("Opening key %s\n", subkey);
if(!regs)
init_registry();
/* t=find_handle_2(key, subkey);
if(t==0)
return -1;
if(t==(reg_handle_t*)-1)
return -1;
*/
full_name=build_keyname(key, subkey);
if(!full_name)
return -1;
v=find_value_by_name(full_name);
t=insert_handle(generate_handle(), full_name);
*newkey=t->handle;
free(full_name);
return 0;
}
long RegCloseKey(long key)
{
reg_handle_t *handle;
if(key==HKEY_LOCAL_MACHINE)
return 0;
if(key==HKEY_CURRENT_USER)
return 0;
handle=find_handle(key);
if(handle==0)
return 0;
if(handle->prev)
handle->prev->next=handle->next;
if(handle->next)
handle->next->prev=handle->prev;
if(handle->name)
free(handle->name);
if(handle==head)
head=head->prev;
free(handle);
return 1;
}
long RegQueryValueExA(long key, char* value, int* reserved, int* type, int* data, int* count)
{
struct reg_value* t;
char* c;
printf("Querying value %s\n", value);
if(!regs)
init_registry();
c=build_keyname(key, value);
if(c==NULL)
return 1;
if((t=find_value_by_name(c))==0)
{
free(c);
return 2;
}
free(c);
if(type)
*type=t->type;
if(data)
memcpy(data, t->value, (t->len<*count)?t->len:*count);
if(count)
{
if(*count<t->len)
{
*count=t->len;
return ERROR_MORE_DATA;
}else return 0;
}
return 0;
}
long RegCreateKeyExA(long key, char* name, long reserved,
void* classs, long options, long security,
void* sec_attr, int* newkey, int* status)
{
reg_handle_t* t;
char* fullname;
struct reg_value* v;
printf("Creating/Opening key %s\n", name);
if(!regs)
init_registry();
fullname=build_keyname(key, name);
if(fullname==NULL)
return 1;
v=find_value_by_name(fullname);
if(v==0)
{
int qw=45708;
v=insert_reg_value(key, name, DIR, &qw, 4);
*status=REG_CREATED_NEW_KEY;
// return 0;
}
else
*status=REG_OPENED_EXISTING_KEY;
t=insert_handle(generate_handle(), fullname);
*newkey=t->handle;
free(fullname);
return 0;
}
long RegSetValueExA(long key, char* name, long v1, long v2, void* data, long size)
{
struct reg_value* t;
char* c;
printf("Request to set value %s\n", name);
if(!regs)
init_registry();
c=build_keyname(key, name);
if(c==NULL)
return 1;
insert_reg_value(key, name, v2, data, size);
free(c);
return 0;
}
#include <stdio.h>
#include <fcntl.h>
#include <pwd.h>
#include <sys/types.h>
#include <wine/winbase.h>
#include <wine/winreg.h>
#include <wine/winnt.h>
#include <wine/winerror.h>
#include <registry.h>
struct reg_value
{
int type;
char* name;
int len;
char* value;
};
static int reg_size=0;
static struct reg_value* regs=0;
struct reg_handle_s;
typedef struct reg_handle_s
{
int handle;
char* name;
struct reg_handle_s* next;
struct reg_handle_s* prev;
} reg_handle_t;
static reg_handle_t* head=0;
#define DIR -25
static void create_registry();
static void open_registry();
static void save_registry();
static void create_registry(){
if(regs)
{
printf("Logic error: create_registry() called with existing registry\n");
save_registry();
return;
}
regs=(struct reg_value*)malloc(3*sizeof(struct reg_value));
regs[0].type=regs[1].type=DIR;
regs[0].name=(char*)malloc(5);
strcpy(regs[0].name, "HKLM");
regs[1].name=(char*)malloc(5);
strcpy(regs[1].name, "HKCU");
regs[0].value=regs[1].value=NULL;
regs[0].len=regs[1].len=0;
reg_size=2;
save_registry();
}
static void open_registry()
{
int fd;
int i;
int len;
struct passwd* pwent;
char* pathname;
if(regs)
{
printf("Multiple open_registry(>\n");
return;
}
pwent=getpwuid(getuid());
pathname=(char*)malloc(strlen(pwent->pw_dir)+20);
strcpy(pathname, pwent->pw_dir);
strcat(pathname, "/.registry");
fd=open(pathname, O_RDONLY);
free(pathname);
if(fd==-1)
{
printf("Creating new registry\n");
create_registry();
return;
}
read(fd, &reg_size, 4);
regs=(struct reg_value*)malloc(reg_size*sizeof(struct reg_value));
for(i=0; i<reg_size; i++)
{
read(fd,&regs[i].type,4);
read(fd,&len,4);
regs[i].name=(char*)malloc(len+1);
if(regs[i].name==0)
{
reg_size=i+1;
goto error;
}
read(fd, regs[i].name, len);
regs[i].name[len]=0;
read(fd,&regs[i].len,4);
regs[i].value=(char*)malloc(len);
if(regs[i].value==0)
{
free(regs[i].name);
reg_size=i+1;
goto error;
}
read(fd, regs[i].value, regs[i].len);
}
error:
close(fd);
return;
}
static void save_registry()
{
int fd, i, len;
struct passwd* pwent;
char* pathname;
pwent=getpwuid(getuid());
pathname=(char*)malloc(strlen(pwent->pw_dir)+20);
strcpy(pathname, pwent->pw_dir);
strcat(pathname, "/.registry");
fd=open(pathname, O_WRONLY | O_CREAT, 00777);
free(pathname);
if(fd==-1)
{
printf("Failed to open registry file for writing.\n");
return;
}
write(fd, &reg_size, 4);
for(i=0; i<reg_size; i++)
{
write(fd, &regs[i].type, 4);
len=strlen(regs[i].name);
write(fd, &len, 4);
write(fd, regs[i].name, len);
write(fd, &regs[i].len, 4);
write(fd, regs[i].value, regs[i].len);
}
close(fd);
}
static reg_handle_t* find_handle_by_name(const char* name)
{
reg_handle_t* t;
for(t=head; t; t=t->prev)
{
if(!strcmp(t->name, name))
{
return t;
}
}
return 0;
}
static struct reg_value* find_value_by_name(const char* name)
{
int i;
for(i=0; i<reg_size; i++)
if(!strcmp(regs[i].name, name))
return regs+i;
return 0;
}
static reg_handle_t* find_handle(int handle)
{
reg_handle_t* t;
for(t=head; t; t=t->prev)
{
if(t->handle==handle)
{
return t;
}
}
return 0;
}
static int generate_handle()
{
static int zz=249;
zz++;
while((zz==HKEY_LOCAL_MACHINE) || (zz==HKEY_CURRENT_USER))
zz++;
return zz;
}
static reg_handle_t* insert_handle(long handle, const char* name)
{
reg_handle_t* t;
t=(reg_handle_t*)malloc(sizeof(reg_handle_t));
if(head==0)
{
t->prev=0;
}
else
{
head->next=t;
t->prev=head;
}
t->next=0;
t->name=(char*)malloc(strlen(name)+1);
strcpy(t->name, name);
t->handle=handle;
head=t;
return t;
}
static char* build_keyname(long key, const char* subkey)
{
char* full_name;
reg_handle_t* t;
if((t=find_handle(key))==0)
{
TRACE("Invalid key\n");
return NULL;
}
if(subkey==NULL)
subkey="<default>";
full_name=(char*)malloc(strlen(t->name)+strlen(subkey)+10);
strcpy(full_name, t->name);
strcat(full_name, "\\");
strcat(full_name, subkey);
return full_name;
}
struct reg_value* insert_reg_value(int handle, const char* name, int type, void* value, int len)
{
reg_handle_t* t;
struct reg_value* v;
char* fullname;
if((fullname=build_keyname(handle, name))==NULL)
{
TRACE("Invalid handle\n");
return NULL;
}
if((v=find_value_by_name(fullname))==0)
//creating new value in registry
{
if(regs==0)
create_registry();
regs=(struct reg_value*)realloc(regs, sizeof(struct reg_value)*(reg_size+1));
v=regs+reg_size;
reg_size++;
}
else
//replacing old one
{
free(v->value);
free(v->name);
}
v->type=type;
v->len=len;
v->value=(char*)malloc(len);
memcpy(v->value, value, len);
v->name=(char*)malloc(strlen(fullname)+1);
strcpy(v->name, fullname);
save_registry();
return v;
}
static void init_registry()
{
printf("Initializing registry\n");
open_registry();
insert_handle(HKEY_LOCAL_MACHINE, "HKLM");
insert_handle(HKEY_CURRENT_USER, "HKCU");
}
static reg_handle_t* find_handle_2(long key, const char* subkey)
{
char* full_name;
reg_handle_t* t;
if((t=find_handle(key))==0)
{
TRACE("Invalid key\n");
return (reg_handle_t*)-1;
}
if(subkey==NULL)
return t;
full_name=(char*)malloc(strlen(t->name)+strlen(subkey)+10);
strcpy(full_name, t->name);
strcat(full_name, "\\");
strcat(full_name, subkey);
t=find_handle_by_name(full_name);
free(full_name);
return t;
}
long RegOpenKeyExA(long key, const char* subkey, long reserved, long access, int* newkey)
{
char* full_name;
reg_handle_t* t;
struct reg_value* v;
TRACE("Opening key %s\n", subkey);
if(!regs)
init_registry()
;
/* t=find_handle_2(key, subkey);
if(t==0)
return -1;
if(t==(reg_handle_t*)-1)
return -1;
*/ full_name=build_keyname(key, subkey);
if(!full_name)
return -1;
v=find_value_by_name(full_name);
t=insert_handle(generate_handle(), full_name);
*newkey=t->handle;
free(full_name);
return 0;
}
long RegCloseKey(long key)
{
reg_handle_t *handle;
if(key==HKEY_LOCAL_MACHINE)
return 0;
if(key==HKEY_CURRENT_USER)
return 0;
handle=find_handle(key);
if(handle==0)
return 0;
if(handle->prev)
handle->prev->next=handle->next;
if(handle->next)
handle->next->prev=handle->prev;
if(handle->name)
free(handle->name);
if(handle==head)
head=head->prev;
free(handle);
return 1;
}
long RegQueryValueExA(long key, const char* value, int* reserved, int* type, int* data, int* count)
{
struct reg_value* t;
char* c;
TRACE("Querying value %s\n", value);
if(!regs)
init_registry()
;
c=build_keyname(key, value);
if(c==NULL)
return 1;
if((t=find_value_by_name(c))==0)
{
free(c);
return 2;
}
free(c);
if(type)
*type=t->type;
if(data)
memcpy(data, t->value, (t->len<*count)?t->len:*count);
if(count)
{
if(*count<t->len)
{
*count=t->len;
return ERROR_MORE_DATA;
}else return 0;
}
return 0;
}
long RegCreateKeyExA(long key, const char* name, long reserved,
void* classs, long options, long security,
void* sec_attr, int* newkey, int* status)
{
reg_handle_t* t;
char* fullname;
struct reg_value* v;
TRACE("Creating/Opening key %s\n", name);
if(!regs)
init_registry()
;
fullname=build_keyname(key, name);
if(fullname==NULL)
return 1;
v=find_value_by_name(fullname);
if(v==0)
{
int qw=45708;
v=insert_reg_value(key, name, DIR, &qw, 4);
*status=REG_CREATED_NEW_KEY;
// return 0;
}
else
*status=REG_OPENED_EXISTING_KEY;
t=insert_handle(generate_handle(), fullname);
*newkey=t->handle;
free(fullname);
return 0;
}
long RegSetValueExA(long key, const char* name, long v1, long v2, void* data, long size)
{
struct reg_value* t;
char* c;
TRACE("Request to set value %s\n", name);
if(!regs)
init_registry()
;
c=build_keyname(key, name);
if(c==NULL)
return 1;
insert_reg_value(key, name, v2, data, size);
free(c);
return 0;
}

View file

@ -31,6 +31,21 @@ HIC VFWAPI ICLocate(long fccType, long fccHandler, LPBITMAPINFOHEADER lpbiIn, LP
#define OpenDriverA DrvOpen
extern HDRVR VFWAPI DrvOpen(long);
#define STORE_ALL \
__asm__ ( \
"push %%ebx\n\t" \
"push %%ecx\n\t" \
"push %%edx\n\t" \
"push %%esi\n\t" \
"push %%edi\n\t"::)
#define REST_ALL \
__asm__ ( \
"pop %%edi\n\t" \
"pop %%esi\n\t" \
"pop %%edx\n\t" \
"pop %%ecx\n\t" \
"pop %%ebx\n\t"::)
typedef struct {
@ -249,7 +264,7 @@ ICCompress(
long VFWAPIV
ICDecompress(HIC hic,long dwFlags,LPBITMAPINFOHEADER lpbiFormat,void* lpData,LPBITMAPINFOHEADER lpbi,void* lpBits) {
ICDECOMPRESS icd;
int result;
icd.dwFlags = dwFlags;
icd.lpbiInput = lpbiFormat;
icd.lpInput = lpData;
@ -257,7 +272,10 @@ ICDecompress(HIC hic,long dwFlags,LPBITMAPINFOHEADER lpbiFormat,void* lpData,LPB
icd.lpbiOutput = lpbi;
icd.lpOutput = lpBits;
icd.ckid = 0;
return ICSendMessage(hic,ICM_DECOMPRESS,(long)&icd,sizeof(icd));
STORE_ALL;
result=ICSendMessage(hic,ICM_DECOMPRESS,(long)&icd,sizeof(icd));
REST_ALL;
return result;
}
/***********************************************************************
@ -267,7 +285,7 @@ LRESULT VFWAPI
ICSendMessage(HIC hic,unsigned int msg,long lParam1,long lParam2) {
LRESULT ret;
WINE_HIC *whic = (WINE_HIC*)hic;
char qw[200];
#define XX(x) case x: TRACE("(0x%08lx,"#x",0x%08lx,0x%08lx)\n",(long)hic,lParam1,lParam2);break;
/*
switch (msg) {
@ -317,25 +335,12 @@ ICSendMessage(HIC hic,unsigned int msg,long lParam1,long lParam2) {
*/
// if (whic->driverproc) {
// FIXME("(0x%08lx,0x%08lx,0x%08lx,0x%08lx), calling %p\n",(long)hic,(long)msg,lParam1,lParam2,whic->driverproc);
#define STORE_ALL \
__asm__ ( \
"push %%ebx\n\t" \
"push %%ecx\n\t" \
"push %%edx\n\t" \
"push %%esi\n\t" \
"push %%edi\n\t"::)
#define REST_ALL \
__asm__ ( \
"pop %%edi\n\t" \
"pop %%esi\n\t" \
"pop %%edx\n\t" \
"pop %%ecx\n\t" \
"pop %%ebx\n\t"::)
// printf("private=%x\n", whic->private);
__asm__ __volatile__ ("fsave (%0)\n\t": :"r"(&qw));
STORE_ALL;
ret = whic->driverproc(whic->private,1,msg,lParam1,lParam2);
ret = whic->driverproc(whic->private,1,msg,lParam1,lParam2);
REST_ALL;
__asm__ __volatile__ ("frstor (%0)\n\t": :"r"(&qw));
// } else
// ret = SendDriverMessage(whic->hdrv,msg,lParam1,lParam2);

55
libs/winloader/wineacm.h Normal file
View file

@ -0,0 +1,55 @@
/* -*- tab-width: 8; c-basic-offset: 4 -*- */
/***********************************************************************
* Wine specific - Win32
*/
typedef struct _WINE_ACMDRIVERID *PWINE_ACMDRIVERID;
typedef struct _WINE_ACMDRIVER *PWINE_ACMDRIVER;
typedef struct _WINE_ACMOBJ
{
PWINE_ACMDRIVERID pACMDriverID;
} WINE_ACMOBJ, *PWINE_ACMOBJ;
typedef struct _WINE_ACMDRIVER
{
WINE_ACMOBJ obj;
HDRVR hDrvr;
DRIVERPROC pfnDriverProc;
PWINE_ACMDRIVER pNextACMDriver;
} WINE_ACMDRIVER;
typedef struct _WINE_ACMSTREAM
{
WINE_ACMOBJ obj;
PWINE_ACMDRIVER pDrv;
ACMDRVSTREAMINSTANCE drvInst;
HACMDRIVER hAcmDriver;
} WINE_ACMSTREAM, *PWINE_ACMSTREAM;
typedef struct _WINE_ACMDRIVERID
{
LPSTR pszDriverAlias;
LPSTR pszFileName;
HINSTANCE hInstModule; /* NULL if global */
DWORD dwProcessID; /* ID of process which installed a local driver */
WIN_BOOL bEnabled;
PWINE_ACMDRIVER pACMDriverList;
PWINE_ACMDRIVERID pNextACMDriverID;
PWINE_ACMDRIVERID pPrevACMDriverID;
} WINE_ACMDRIVERID;
/* From internal.c */
extern HANDLE MSACM_hHeap;
extern PWINE_ACMDRIVERID MSACM_pFirstACMDriverID;
extern PWINE_ACMDRIVERID MSACM_pLastACMDriverID;
PWINE_ACMDRIVERID MSACM_RegisterDriver(
LPSTR pszDriverAlias, LPSTR pszFileName,
HINSTANCE hinstModule);
void MSACM_RegisterAllDrivers(void);
PWINE_ACMDRIVERID MSACM_UnregisterDriver(PWINE_ACMDRIVERID p);
void MSACM_UnregisterAllDrivers(void);
PWINE_ACMDRIVERID MSACM_GetDriverID(HACMDRIVERID hDriverID);
PWINE_ACMDRIVER MSACM_GetDriver(HACMDRIVER hDriver);
PWINE_ACMOBJ MSACM_GetObj(HACMOBJ hObj);