gstreamer/subprojects/gst-plugins-good/gst/goom/xmmx.c

/*	xmmx.c

	eXtended MultiMedia eXtensions GCC interface library for IA32.

	To use this library, simply include this header file
	and compile with GCC.  You MUST have inlining enabled
	in order for xmmx_ok() to work; this can be done by
	simply using -O on the GCC command line.

	Compiling with -DXMMX_TRACE will cause detailed trace
	output to be sent to stderr for each mmx operation.
	This adds lots of code, and obviously slows execution to
	a crawl, but can be very useful for debugging.

	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
	AND FITNESS FOR ANY PARTICULAR PURPOSE.

	1999 by R. Fisher
	Based on libmmx, 1997-99 by H. Dietz and R. Fisher

 Notes:
	It appears that the latest gas has the pand problem fixed, therefore
	  I'll undefine BROKEN_PAND by default.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "goom_config.h"

#ifdef HAVE_MMX

/* a definir pour avoir exactement le meme resultat que la fonction C
 * (un chouillat plus lent).. mais la difference est assez peu notable.
 */
// #define STRICT_COMPAT

#define BUFFPOINTNB 16
#define BUFFPOINTMASK 0xffff
#define BUFFINCR 0xff

#define sqrtperte 16
/* faire : a % sqrtperte <=> a & pertemask*/
#define PERTEMASK 0xf
/* faire : a / sqrtperte <=> a >> PERTEDEC*/
#define PERTEDEC 4


/*#define MMX_TRACE*/
#include "mmx.h"
/*#include "xmmx.h"*/
#include "goom_graphic.h"

int
xmmx_supported (void)
{
  return (mm_support () & 0x8) >> 3;
}

void
zoom_filter_xmmx (int prevX, int prevY,
    Pixel * expix1, Pixel * expix2,
    int *lbruS, int *lbruD, int buffratio, int precalCoef[16][16])
{
  int bufsize = prevX * prevY;  /* taille du buffer */
  volatile int loop;            /* variable de boucle */

  mmx_t *brutS = (mmx_t *) lbruS;       /* buffer de transformation source */
  mmx_t *brutD = (mmx_t *) lbruD;       /* buffer de transformation dest */

  volatile mmx_t prevXY;
  volatile mmx_t ratiox;

  /*      volatile mmx_t interpix; */

  expix1[0].val = expix1[prevX - 1].val = expix1[prevX * prevY - 1].val =
      expix1[prevX * prevY - prevX].val = 0;

  prevXY.ud[0] = (prevX - 1) << PERTEDEC;
  prevXY.ud[1] = (prevY - 1) << PERTEDEC;

  ratiox.d[0] = buffratio;
  ratiox.d[1] = buffratio;

  asm volatile ("\n\t movq  %[ratio], %%mm6" "\n\t pslld $16,      %%mm6"       /* mm6 = [rat16=buffratio<<16 | rat16=buffratio<<16] */
      "\n\t pxor  %%mm7,    %%mm7"      /* mm7 = 0 */
      ::[ratio] "m" (ratiox));

  loop = 0;

  /*
   * NOTE : mm6 et mm7 ne sont pas modifies dans la boucle.
   */
  while (loop < bufsize) {
    /* Thread #1
     * pre :  mm6 = [rat16|rat16]
     * post : mm0 = S + ((D-S)*rat16 format [X|Y]
     * modified = mm0,mm1,mm2
     */

    asm volatile ("#1 \n\t movq 0(%[brutS]), %%mm0" "#1 \n\t movq 0(%[brutD]), %%mm1" "#1 \n\t psubd   %%mm0, %%mm1"    /* mm1 = D - S */
        "#1 \n\t movq    %%mm1, %%mm2"  /* mm2 = D - S */
        "#1 \n\t pslld     $16, %%mm1" "#1 \n\t pmullw  %%mm6, %%mm2" "#1 \n\t pmulhuw %%mm6, %%mm1" "#1 \n\t pslld   $16,   %%mm0" "#1 \n\t paddd   %%mm2, %%mm1"      /* mm1 = (D - S) * buffratio >> 16 */
        "#1 \n\t paddd   %%mm1, %%mm0"  /* mm0 = S + mm1 */
        "#1 \n\t psrld   $16,   %%mm0"::[brutS] "r" (&brutS[loop]),
        [brutD] "r" (&brutD[loop])
        );                      /* mm0 = S */

    /*
     * pre : mm0 : position vector on screen
     *       prevXY : coordinate of the lower-right point on screen
     * post : clipped mm0
     * modified : mm0,mm1,mm2
     */
    asm volatile
        ("#1 \n\t movq %[prevXY], %%mm1" "#1 \n\t pcmpgtd %%mm0,  %%mm1"
        /* mm0 en X contient (idem pour Y) :
         *   1111 si prevXY > px
         *   0000 si prevXY <= px */
#ifdef STRICT_COMPAT
        "#1 \n\t movq      %%mm1, %%mm2"
        "#1 \n\t punpckhdq %%mm2, %%mm2"
        "#1 \n\t punpckldq %%mm1, %%mm1" "#1 \n\t pand      %%mm2, %%mm0"
#endif
        "#1 \n\t pand %%mm1, %%mm0"     /* on met a zero la partie qui deborde */
        ::[prevXY] "m" (prevXY));

    /* Thread #2
     * pre :  mm0 : clipped position on screen
     *
     * post : mm3 : coefs for this position
     *        mm1 : X vector [0|X]
     *
     * modif : eax,esi
     */
    __asm__ __volatile__ ("#2 \n\t movd %%mm0,%%esi"
        "#2 \n\t movq %%mm0,%%mm1"
        "#2 \n\t andl $15,%%esi"
        "#2 \n\t psrlq $32,%%mm1"
        "#2 \n\t shll $6,%%esi"
        "#2 \n\t movd %%mm1,%%eax"
        "#2 \n\t addl %[precalCoef],%%esi"
        "#2 \n\t andl $15,%%eax"
        "#2 \n\t movd (%%esi,%%eax,4),%%mm3"::[precalCoef]
        "g" (precalCoef):"eax", "esi");

    /*
     * extraction des coefficients... (Thread #3)
     *
     * pre : coef dans mm3
     *
     * post : coef extraits dans mm3 (c1 & c2)
     *                        et mm4 (c3 & c4)
     *
     * modif : mm5
     */

    /* (Thread #4)
     * pre : mm0 : Y pos [*|Y]
     *       mm1 : X pos [*|X]
     *
     * post : mm0 : expix1[position]
     *        mm2 : expix1[position+largeur]
     *
     * modif : eax, esi
     */
    __asm__ __volatile__ ("#2 \n\t psrld $4, %%mm0" "#2 \n\t psrld $4, %%mm1"   /* PERTEDEC = $4 */
        "#4 \n\t movd %%mm1,%%eax"
        "#3 \n\t movq %%mm3,%%mm5"
        "#4 \n\t mull %[prevX]"
        "#4 \n\t movd %%mm0,%%esi"
        "#3 \n\t punpcklbw %%mm5, %%mm3"
        "#4 \n\t addl %%esi, %%eax"
        "#3 \n\t movq %%mm3, %%mm4"
        "#3 \n\t movq %%mm3, %%mm5"
        "#4 \n\t movl %[expix1], %%esi"
        "#3 \n\t punpcklbw %%mm5, %%mm3"
        "#4 \n\t movq (%%esi,%%eax,4),%%mm0"
        "#3 \n\t punpckhbw %%mm5, %%mm4"
        "#4 \n\t addl %[prevX],%%eax"
        "#4 \n\t movq (%%esi,%%eax,4),%%mm2"::[expix1] "g" (expix1)
        ,[prevX] "g" (prevX)
        :"eax", "esi", "edx");

    /*
     * pre :       mm0 : expix1[position]
     *             mm2 : expix1[position+largeur]
     *       mm3 & mm4 : coefs
     */

    /* recopie des deux premiers pixels dans mm0 et mm1 */
    movq_r2r (mm0, mm1);        /* b1-v1-r1-a1-b2-v2-r2-a2 */

    /* depackage du premier pixel */
    punpcklbw_r2r (mm7, mm0);   /* 00-b2-00-v2-00-r2-00-a2 */

    /* extraction des coefficients... */

    movq_r2r (mm3, mm5);        /* c2-c2-c2-c2-c1-c1-c1-c1 */

    /*^en parrallele^ *//* depackage du 2ieme pixel */
    /*^ */ punpckhbw_r2r (mm7, mm1);
    /* 00-b1-00-v1-00-r1-00-a1 */

    punpcklbw_r2r (mm7, mm5);   /* 00-c1-00-c1-00-c1-00-c1 */
    punpckhbw_r2r (mm7, mm3);   /* 00-c2-00-c2-00-c2-00-c2 */

    /* multiplication des pixels par les coefficients */
    pmullw_r2r (mm5, mm0);      /* c1*b2-c1*v2-c1*r2-c1*a2 */
    pmullw_r2r (mm3, mm1);      /* c2*b1-c2*v1-c2*r1-c2*a1 */
    paddw_r2r (mm1, mm0);

    /* ...extraction des 2 derniers coefficients */
    movq_r2r (mm4, mm5);        /* c4-c4-c4-c4-c3-c3-c3-c3 */
    punpcklbw_r2r (mm7, mm4);   /* 00-c3-00-c3-00-c3-00-c3 */
    punpckhbw_r2r (mm7, mm5);   /* 00-c4-00-c4-00-c4-00-c4 */

    /* recuperation des 2 derniers pixels */
    movq_r2r (mm2, mm1);

    /* depackage des pixels */
    punpcklbw_r2r (mm7, mm1);
    punpckhbw_r2r (mm7, mm2);

    /* multiplication pas les coeffs */
    pmullw_r2r (mm4, mm1);
    pmullw_r2r (mm5, mm2);

    /* ajout des valeurs obtenues <20> la valeur finale */
    paddw_r2r (mm1, mm0);
    paddw_r2r (mm2, mm0);

    /* division par 256 = 16+16+16+16, puis repackage du pixel final */
    psrlw_i2r (8, mm0);
    packuswb_r2r (mm7, mm0);

    movd_r2m (mm0, expix2[loop]);

    ++loop;
  }
  /* this was femms, which is AMD 3dnow */
  __asm__ __volatile__ ("emms\n");
}

#define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \
{ \
	movd_m2r(_backbuf, mm0); \
	paddusb_m2r(_col, mm0); \
	movd_r2m(mm0, _out); \
}

#define DRAWMETHOD DRAWMETHOD_PLUS_XMMX(*p,*p,col)

void
draw_line_xmmx (Pixel * data, int x1, int y1, int x2, int y2, int col,
    int screenx, int screeny)
{
  int x, y, dx, dy, yy, xx;
  Pixel *p;

  if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny)
      || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx))
    goto end_of_line;

  dx = x2 - x1;
  dy = y2 - y1;
  if (x1 >= x2) {
    int tmp;

    tmp = x1;
    x1 = x2;
    x2 = tmp;
    tmp = y1;
    y1 = y2;
    y2 = tmp;
    dx = x2 - x1;
    dy = y2 - y1;
  }

  /* vertical line */
  if (dx == 0) {
    if (y1 < y2) {
      p = &(data[(screenx * y1) + x1]);
      for (y = y1; y <= y2; y++) {
        DRAWMETHOD;
        p += screenx;
      }
    } else {
      p = &(data[(screenx * y2) + x1]);
      for (y = y2; y <= y1; y++) {
        DRAWMETHOD;
        p += screenx;
      }
    }
    goto end_of_line;
  }
  /* horizontal line */
  if (dy == 0) {
    if (x1 < x2) {
      p = &(data[(screenx * y1) + x1]);
      for (x = x1; x <= x2; x++) {
        DRAWMETHOD;
        p++;
      }
      goto end_of_line;
    } else {
      p = &(data[(screenx * y1) + x2]);
      for (x = x2; x <= x1; x++) {
        DRAWMETHOD;
        p++;
      }
      goto end_of_line;
    }
  }
  /* 1    */
  /*  \   */
  /*   \  */
  /*    2 */
  if (y2 > y1) {
    /* steep */
    if (dy > dx) {
      dx = ((dx << 16) / dy);
      x = x1 << 16;
      for (y = y1; y <= y2; y++) {
        xx = x >> 16;
        p = &(data[(screenx * y) + xx]);
        DRAWMETHOD;
        if (xx < (screenx - 1)) {
          p++;
          /* DRAWMETHOD; */
        }
        x += dx;
      }
      goto end_of_line;
    }
    /* shallow */
    else {
      dy = ((dy << 16) / dx);
      y = y1 << 16;
      for (x = x1; x <= x2; x++) {
        yy = y >> 16;
        p = &(data[(screenx * yy) + x]);
        DRAWMETHOD;
        if (yy < (screeny - 1)) {
          p += screeny;
          /* DRAWMETHOD; */
        }
        y += dy;
      }
    }
  }
  /*    2 */
  /*   /  */
  /*  /   */
  /* 1    */
  else {
    /* steep */
    if (-dy > dx) {
      dx = ((dx << 16) / -dy);
      x = (x1 + 1) << 16;
      for (y = y1; y >= y2; y--) {
        xx = x >> 16;
        p = &(data[(screenx * y) + xx]);
        DRAWMETHOD;
        if (xx < (screenx - 1)) {
          p--;
          /* DRAWMETHOD; */
        }
        x += dx;
      }
      goto end_of_line;
    }
    /* shallow */
    else {
      dy = ((dy << 16) / dx);
      y = y1 << 16;
      for (x = x1; x <= x2; x++) {
        yy = y >> 16;
        p = &(data[(screenx * yy) + x]);
        DRAWMETHOD;
        if (yy < (screeny - 1)) {
          p += screeny;
          /* DRAWMETHOD; */
        }
        y += dy;
      }
      goto end_of_line;
    }
  }
end_of_line:
  /* this was femms, which is AMD 3dnow */
  __asm__ __volatile__ ("emms\n");
}
#else
int
xmmx_supported (void)
{
  return (0);
}
#endif
-												gst/goom/: Add license headers in all source files. Remove filter.c from

Original commit message from CVS:
* gst/goom/Makefile.am:
* gst/goom/README:
* gst/goom/config_param.c:
* gst/goom/convolve_fx.c:
* gst/goom/drawmethods.c:
* gst/goom/drawmethods.h:
* gst/goom/filters.c:
* gst/goom/filters_mmx.s:
* gst/goom/flying_stars_fx.c:
* gst/goom/goom.h:
* gst/goom/goom_config.h:
* gst/goom/goom_config_param.h:
* gst/goom/goom_core.c:
* gst/goom/goom_filters.h:
* gst/goom/goom_fx.h:
* gst/goom/goom_graphic.h:
* gst/goom/goom_plugin_info.h:
* gst/goom/goom_tools.c:
* gst/goom/goom_tools.h:
* gst/goom/goom_typedefs.h:
* gst/goom/goom_visual_fx.h:
* gst/goom/graphic.c:
* gst/goom/ifs.c:
* gst/goom/ifs.h:
* gst/goom/lines.c:
* gst/goom/lines.h:
* gst/goom/mathtools.c:
* gst/goom/mathtools.h:
* gst/goom/mmx.c:
* gst/goom/motif_goom1.h:
* gst/goom/motif_goom2.h:
* gst/goom/plugin_info.c:
* gst/goom/ppc_drawings.h:
* gst/goom/ppc_zoom_ultimate.h:
* gst/goom/sound_tester.c:
* gst/goom/sound_tester.h:
* gst/goom/surf3d.c:
* gst/goom/surf3d.h:
* gst/goom/tentacle3d.c:
* gst/goom/tentacle3d.h:
* gst/goom/v3d.c:
* gst/goom/v3d.h:
* gst/goom/xmmx.c:
Add license headers in all source files. Remove filter.c from
EXTRA_DIST, as its in SOURCES already. Mention the files in the REDME
which are not used right now. Fixes #557709.

											
										
										
											2008-10-28 06:50:57 +00:00
+								/*	xmmx.c
 									eXtended MultiMedia eXtensions GCC interface library for IA32.
 									To use this library, simply include this header file
 									and compile with GCC.  You MUST have inlining enabled
 									in order for xmmx_ok() to work; this can be done by
 									simply using -O on the GCC command line.
 									Compiling with -DXMMX_TRACE will cause detailed trace
 									output to be sent to stderr for each mmx operation.
 									This adds lots of code, and obviously slows execution to
 									a crawl, but can be very useful for debugging.
 									THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
 									EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
 									LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 									AND FITNESS FOR ANY PARTICULAR PURPOSE.
 by R. Fisher
 									Based on libmmx, 1997-99 by H. Dietz and R. Fisher
 								 Notes:
 									It appears that the latest gas has the pand problem fixed, therefore
 									  I'll undefine BROKEN_PAND by default.
 								*/
-												configure.ac: Detect and indicate if GCC inline assembly syntax is available.

Original commit message from CVS:
* configure.ac:
Detect and indicate if GCC inline assembly syntax is
available.
* gst/goom/Makefile.am:
* gst/goom/convolve_fx.c:
* gst/goom/flying_stars_fx.c:
* gst/goom/goom_config.h:
* gst/goom/goom_core.c:
* gst/goom/goomsl.c:
* gst/goom/ifs.c:
* gst/goom/mmx.c:
* gst/goom/plugin_info.c:
* gst/goom/xmmx.c:
Fix various GCC-isms, and only build the inline assembly
with compilers that support GCC inline assembly.
Fix a couple of other warnings shown with Forte.

											
										
										
											2008-02-26 10:09:38 +00:00
+								#ifdef HAVE_CONFIG_H
 								#include "config.h"
 								#endif
 								#include "goom_config.h"
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
 								#ifdef HAVE_MMX
 								/* a definir pour avoir exactement le meme resultat que la fonction C
 								 * (un chouillat plus lent).. mais la difference est assez peu notable.
 								 */
 								// #define STRICT_COMPAT
 								#define BUFFPOINTNB 16
 								#define BUFFPOINTMASK 0xffff
 								#define BUFFINCR 0xff
 								#define sqrtperte 16
 								/* faire : a % sqrtperte <=> a & pertemask*/
 								#define PERTEMASK 0xf
 								/* faire : a / sqrtperte <=> a >> PERTEDEC*/
 								#define PERTEDEC 4
 								/*#define MMX_TRACE*/
 								#include "mmx.h"
 								/*#include "xmmx.h"*/
 								#include "goom_graphic.h"
 								int
 								xmmx_supported (void)
 								{
 								  return (mm_support () & 0x8) >> 3;
 								}
 								void
 								zoom_filter_xmmx (int prevX, int prevY,
 								    Pixel * expix1, Pixel * expix2,
 								    int *lbruS, int *lbruD, int buffratio, int precalCoef[16][16])
 								{
 								  int bufsize = prevX * prevY;  /* taille du buffer */
 								  volatile int loop;            /* variable de boucle */
 								  mmx_t *brutS = (mmx_t *) lbruS;       /* buffer de transformation source */
 								  mmx_t *brutD = (mmx_t *) lbruD;       /* buffer de transformation dest */
 								  volatile mmx_t prevXY;
 								  volatile mmx_t ratiox;
 								  /*      volatile mmx_t interpix; */
 								  expix1[0].val = expix1[prevX - 1].val = expix1[prevX * prevY - 1].val =
 								      expix1[prevX * prevY - prevX].val = 0;
 								  prevXY.ud[0] = (prevX - 1) << PERTEDEC;
 								  prevXY.ud[1] = (prevY - 1) << PERTEDEC;
 								  ratiox.d[0] = buffratio;
 								  ratiox.d[1] = buffratio;
 								  asm volatile ("\n\t movq  %[ratio], %%mm6" "\n\t pslld $16,      %%mm6"       /* mm6 = [rat16=buffratio<<16 | rat16=buffratio<<16] */
 								      "\n\t pxor  %%mm7,    %%mm7"      /* mm7 = 0 */
 								      ::[ratio] "m" (ratiox));
 								  loop = 0;
 								  /*
 								   * NOTE : mm6 et mm7 ne sont pas modifies dans la boucle.
 								   */
 								  while (loop < bufsize) {
 								    /* Thread #1
 								     * pre :  mm6 = [rat16|rat16]
 								     * post : mm0 = S + ((D-S)*rat16 format [X|Y]
 								     * modified = mm0,mm1,mm2
 								     */
-												gst/goom/xmmx.c: Fix constraints on asm code so that it compiles consistently.  Fixes #522278.

Original commit message from CVS:
* gst/goom/xmmx.c: Fix constraints on asm code so that it
compiles consistently.  Fixes #522278.

											
										
										
											2008-03-31 22:06:14 +00:00
+								    asm volatile ("#1 \n\t movq 0(%[brutS]), %%mm0" "#1 \n\t movq 0(%[brutD]), %%mm1" "#1 \n\t psubd   %%mm0, %%mm1"    /* mm1 = D - S */
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
+								        "#1 \n\t movq    %%mm1, %%mm2"  /* mm2 = D - S */
 								        "#1 \n\t pslld     $16, %%mm1" "#1 \n\t pmullw  %%mm6, %%mm2" "#1 \n\t pmulhuw %%mm6, %%mm1" "#1 \n\t pslld   $16,   %%mm0" "#1 \n\t paddd   %%mm2, %%mm1"      /* mm1 = (D - S) * buffratio >> 16 */
 								        "#1 \n\t paddd   %%mm1, %%mm0"  /* mm0 = S + mm1 */
-												gst/goom/xmmx.c: Fix constraints on asm code so that it compiles consistently.  Fixes #522278.

Original commit message from CVS:
* gst/goom/xmmx.c: Fix constraints on asm code so that it
compiles consistently.  Fixes #522278.

											
										
										
											2008-03-31 22:06:14 +00:00
+								        "#1 \n\t psrld   $16,   %%mm0"::[brutS] "r" (&brutS[loop]),
 								        [brutD] "r" (&brutD[loop])
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
+								        );                      /* mm0 = S */
 								    /*
 								     * pre : mm0 : position vector on screen
 								     *       prevXY : coordinate of the lower-right point on screen
 								     * post : clipped mm0
 								     * modified : mm0,mm1,mm2
 								     */
 								    asm volatile
 								        ("#1 \n\t movq %[prevXY], %%mm1" "#1 \n\t pcmpgtd %%mm0,  %%mm1"
 								        /* mm0 en X contient (idem pour Y) :
 								         *   1111 si prevXY > px
 								         *   0000 si prevXY <= px */
 								#ifdef STRICT_COMPAT
 								        "#1 \n\t movq      %%mm1, %%mm2"
 								        "#1 \n\t punpckhdq %%mm2, %%mm2"
 								        "#1 \n\t punpckldq %%mm1, %%mm1" "#1 \n\t pand      %%mm2, %%mm0"
 								#endif
 								        "#1 \n\t pand %%mm1, %%mm0"     /* on met a zero la partie qui deborde */
 								        ::[prevXY] "m" (prevXY));
 								    /* Thread #2
 								     * pre :  mm0 : clipped position on screen
 								     *
 								     * post : mm3 : coefs for this position
 								     *        mm1 : X vector [0|X]
 								     *
 								     * modif : eax,esi
 								     */
 								    __asm__ __volatile__ ("#2 \n\t movd %%mm0,%%esi"
 								        "#2 \n\t movq %%mm0,%%mm1"
 								        "#2 \n\t andl $15,%%esi"
 								        "#2 \n\t psrlq $32,%%mm1"
 								        "#2 \n\t shll $6,%%esi"
 								        "#2 \n\t movd %%mm1,%%eax"
 								        "#2 \n\t addl %[precalCoef],%%esi"
 								        "#2 \n\t andl $15,%%eax"
 								        "#2 \n\t movd (%%esi,%%eax,4),%%mm3"::[precalCoef]
 								        "g" (precalCoef):"eax", "esi");
 								    /*
 								     * extraction des coefficients... (Thread #3)
 								     *
 								     * pre : coef dans mm3
 								     *
 								     * post : coef extraits dans mm3 (c1 & c2)
 								     *                        et mm4 (c3 & c4)
 								     *
 								     * modif : mm5
 								     */
 								    /* (Thread #4)
 								     * pre : mm0 : Y pos [*|Y]
 								     *       mm1 : X pos [*|X]
 								     *
 								     * post : mm0 : expix1[position]
 								     *        mm2 : expix1[position+largeur]
 								     *
 								     * modif : eax, esi
 								     */
 								    __asm__ __volatile__ ("#2 \n\t psrld $4, %%mm0" "#2 \n\t psrld $4, %%mm1"   /* PERTEDEC = $4 */
 								        "#4 \n\t movd %%mm1,%%eax"
 								        "#3 \n\t movq %%mm3,%%mm5"
 								        "#4 \n\t mull %[prevX]"
 								        "#4 \n\t movd %%mm0,%%esi"
 								        "#3 \n\t punpcklbw %%mm5, %%mm3"
 								        "#4 \n\t addl %%esi, %%eax"
 								        "#3 \n\t movq %%mm3, %%mm4"
 								        "#3 \n\t movq %%mm3, %%mm5"
 								        "#4 \n\t movl %[expix1], %%esi"
 								        "#3 \n\t punpcklbw %%mm5, %%mm3"
 								        "#4 \n\t movq (%%esi,%%eax,4),%%mm0"
 								        "#3 \n\t punpckhbw %%mm5, %%mm4"
 								        "#4 \n\t addl %[prevX],%%eax"
 								        "#4 \n\t movq (%%esi,%%eax,4),%%mm2"::[expix1] "g" (expix1)
 								        ,[prevX] "g" (prevX)
-												goom: add edx to clobber list in inline assembly code

mull modifies %edx, so should be mentioned in clobber list.
Fixes crash on Solaris (#615998).

											
										
										
											2010-04-17 09:06:41 +00:00
+								        :"eax", "esi", "edx");
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
 								    /*
 								     * pre :       mm0 : expix1[position]
 								     *             mm2 : expix1[position+largeur]
 								     *       mm3 & mm4 : coefs
 								     */
 								    /* recopie des deux premiers pixels dans mm0 et mm1 */
 								    movq_r2r (mm0, mm1);        /* b1-v1-r1-a1-b2-v2-r2-a2 */
 								    /* depackage du premier pixel */
 								    punpcklbw_r2r (mm7, mm0);   /* 00-b2-00-v2-00-r2-00-a2 */
 								    /* extraction des coefficients... */
 								    movq_r2r (mm3, mm5);        /* c2-c2-c2-c2-c1-c1-c1-c1 */
 								    /*^en parrallele^ *//* depackage du 2ieme pixel */
-												gst/goom/xmmx.c: Use 'emms' instead of 'femms' to not crash on cpus that do not implement this 3dnow specific instruc...

Original commit message from CVS:
* gst/goom/xmmx.c:
Use 'emms' instead of 'femms' to not crash on cpus that do not
implement this 3dnow specific instruction.

											
										
										
											2008-02-25 12:03:46 +00:00
+								    /*^ */ punpckhbw_r2r (mm7, mm1);
 								    /* 00-b1-00-v1-00-r1-00-a1 */
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
 								    punpcklbw_r2r (mm7, mm5);   /* 00-c1-00-c1-00-c1-00-c1 */
 								    punpckhbw_r2r (mm7, mm3);   /* 00-c2-00-c2-00-c2-00-c2 */
 								    /* multiplication des pixels par les coefficients */
 								    pmullw_r2r (mm5, mm0);      /* c1*b2-c1*v2-c1*r2-c1*a2 */
 								    pmullw_r2r (mm3, mm1);      /* c2*b1-c2*v1-c2*r1-c2*a1 */
 								    paddw_r2r (mm1, mm0);
 								    /* ...extraction des 2 derniers coefficients */
 								    movq_r2r (mm4, mm5);        /* c4-c4-c4-c4-c3-c3-c3-c3 */
 								    punpcklbw_r2r (mm7, mm4);   /* 00-c3-00-c3-00-c3-00-c3 */
 								    punpckhbw_r2r (mm7, mm5);   /* 00-c4-00-c4-00-c4-00-c4 */
 								    /* recuperation des 2 derniers pixels */
 								    movq_r2r (mm2, mm1);
 								    /* depackage des pixels */
 								    punpcklbw_r2r (mm7, mm1);
 								    punpckhbw_r2r (mm7, mm2);
 								    /* multiplication pas les coeffs */
 								    pmullw_r2r (mm4, mm1);
 								    pmullw_r2r (mm5, mm2);
-												gst/goom/xmmx.c: Use 'emms' instead of 'femms' to not crash on cpus that do not implement this 3dnow specific instruc...

Original commit message from CVS:
* gst/goom/xmmx.c:
Use 'emms' instead of 'femms' to not crash on cpus that do not
implement this 3dnow specific instruction.

											
										
										
											2008-02-25 12:03:46 +00:00
+								    /* ajout des valeurs obtenues <20> la valeur finale */
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
+								    paddw_r2r (mm1, mm0);
 								    paddw_r2r (mm2, mm0);
 								    /* division par 256 = 16+16+16+16, puis repackage du pixel final */
 								    psrlw_i2r (8, mm0);
 								    packuswb_r2r (mm7, mm0);
 								    movd_r2m (mm0, expix2[loop]);
 								    ++loop;
 								  }
-												gst/goom/xmmx.c: Use 'emms' instead of 'femms' to not crash on cpus that do not implement this 3dnow specific instruc...

Original commit message from CVS:
* gst/goom/xmmx.c:
Use 'emms' instead of 'femms' to not crash on cpus that do not
implement this 3dnow specific instruction.

											
										
										
											2008-02-25 12:03:46 +00:00
+								  /* this was femms, which is AMD 3dnow */
 								  __asm__ __volatile__ ("emms\n");
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
+								}
 								#define DRAWMETHOD_PLUS_XMMX(_out,_backbuf,_col) \
 								{ \
 									movd_m2r(_backbuf, mm0); \
 									paddusb_m2r(_col, mm0); \
 									movd_r2m(mm0, _out); \
 								}
 								#define DRAWMETHOD DRAWMETHOD_PLUS_XMMX(*p,*p,col)
 								void
 								draw_line_xmmx (Pixel * data, int x1, int y1, int x2, int y2, int col,
 								    int screenx, int screeny)
 								{
 								  int x, y, dx, dy, yy, xx;
 								  Pixel *p;
 								  if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny)
 								      || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx))
 								    goto end_of_line;
 								  dx = x2 - x1;
 								  dy = y2 - y1;
 								  if (x1 >= x2) {
 								    int tmp;
 								    tmp = x1;
 								    x1 = x2;
 								    x2 = tmp;
 								    tmp = y1;
 								    y1 = y2;
 								    y2 = tmp;
 								    dx = x2 - x1;
 								    dy = y2 - y1;
 								  }
 								  /* vertical line */
 								  if (dx == 0) {
 								    if (y1 < y2) {
 								      p = &(data[(screenx * y1) + x1]);
 								      for (y = y1; y <= y2; y++) {
 								        DRAWMETHOD;
 								        p += screenx;
 								      }
 								    } else {
 								      p = &(data[(screenx * y2) + x1]);
 								      for (y = y2; y <= y1; y++) {
 								        DRAWMETHOD;
 								        p += screenx;
 								      }
 								    }
 								    goto end_of_line;
 								  }
 								  /* horizontal line */
 								  if (dy == 0) {
 								    if (x1 < x2) {
 								      p = &(data[(screenx * y1) + x1]);
 								      for (x = x1; x <= x2; x++) {
 								        DRAWMETHOD;
 								        p++;
 								      }
 								      goto end_of_line;
 								    } else {
 								      p = &(data[(screenx * y1) + x2]);
 								      for (x = x2; x <= x1; x++) {
 								        DRAWMETHOD;
 								        p++;
 								      }
 								      goto end_of_line;
 								    }
 								  }
 								  /* 1    */
 								  /*  \   */
 								  /*   \  */
 								  /*    2 */
 								  if (y2 > y1) {
 								    /* steep */
 								    if (dy > dx) {
 								      dx = ((dx << 16) / dy);
 								      x = x1 << 16;
 								      for (y = y1; y <= y2; y++) {
 								        xx = x >> 16;
 								        p = &(data[(screenx * y) + xx]);
 								        DRAWMETHOD;
 								        if (xx < (screenx - 1)) {
 								          p++;
 								          /* DRAWMETHOD; */
 								        }
 								        x += dx;
 								      }
 								      goto end_of_line;
 								    }
 								    /* shallow */
 								    else {
 								      dy = ((dy << 16) / dx);
 								      y = y1 << 16;
 								      for (x = x1; x <= x2; x++) {
 								        yy = y >> 16;
 								        p = &(data[(screenx * yy) + x]);
 								        DRAWMETHOD;
 								        if (yy < (screeny - 1)) {
 								          p += screeny;
 								          /* DRAWMETHOD; */
 								        }
 								        y += dy;
 								      }
 								    }
 								  }
 								  /*    2 */
 								  /*   /  */
 								  /*  /   */
 								  /* 1    */
 								  else {
 								    /* steep */
 								    if (-dy > dx) {
 								      dx = ((dx << 16) / -dy);
 								      x = (x1 + 1) << 16;
 								      for (y = y1; y >= y2; y--) {
 								        xx = x >> 16;
 								        p = &(data[(screenx * y) + xx]);
 								        DRAWMETHOD;
 								        if (xx < (screenx - 1)) {
 								          p--;
 								          /* DRAWMETHOD; */
 								        }
 								        x += dx;
 								      }
 								      goto end_of_line;
 								    }
 								    /* shallow */
 								    else {
 								      dy = ((dy << 16) / dx);
 								      y = y1 << 16;
 								      for (x = x1; x <= x2; x++) {
 								        yy = y >> 16;
 								        p = &(data[(screenx * yy) + x]);
 								        DRAWMETHOD;
 								        if (yy < (screeny - 1)) {
 								          p += screeny;
 								          /* DRAWMETHOD; */
 								        }
 								        y += dy;
 								      }
 								      goto end_of_line;
 								    }
 								  }
 								end_of_line:
-												gst/goom/xmmx.c: Use 'emms' instead of 'femms' to not crash on cpus that do not implement this 3dnow specific instruc...

Original commit message from CVS:
* gst/goom/xmmx.c:
Use 'emms' instead of 'femms' to not crash on cpus that do not
implement this 3dnow specific instruction.

											
										
										
											2008-02-25 12:03:46 +00:00
+								  /* this was femms, which is AMD 3dnow */
 								  __asm__ __volatile__ ("emms\n");
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
+								}
-												configure.ac: Detect and indicate if GCC inline assembly syntax is available.

Original commit message from CVS:
* configure.ac:
Detect and indicate if GCC inline assembly syntax is
available.
* gst/goom/Makefile.am:
* gst/goom/convolve_fx.c:
* gst/goom/flying_stars_fx.c:
* gst/goom/goom_config.h:
* gst/goom/goom_core.c:
* gst/goom/goomsl.c:
* gst/goom/ifs.c:
* gst/goom/mmx.c:
* gst/goom/plugin_info.c:
* gst/goom/xmmx.c:
Fix various GCC-isms, and only build the inline assembly
with compilers that support GCC inline assembly.
Fix a couple of other warnings shown with Forte.

											
										
										
											2008-02-26 10:09:38 +00:00
+								#else
 								int
 								xmmx_supported (void)
 								{
 								  return (0);
 								}
-												configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin

Original commit message from CVS:
2008-02-23  Bastien Nocera  <hadess@hadess.net>

* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin

* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...

* gst/goom2k1/*: ... here, in case somebody is sick enough

Fixes #515073

											
										
										
											2008-02-23 01:51:37 +00:00
+								#endif