diff --git a/ChangeLog b/ChangeLog
index 73f857bdd1..1e92dd2132 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,54 @@
+2004-03-15  Thomas Vander Stichele  <thomas at apestaart dot org>
+
+	* configure.ac:
+          adding ffmpegcolorspace element
+	* gst/ffmpegcolorspace/Makefile.am:
+	* gst/ffmpegcolorspace/avcodec.h:
+	* gst/ffmpegcolorspace/common.h:
+	* gst/ffmpegcolorspace/dsputil.c: (dsputil_static_init):
+	* gst/ffmpegcolorspace/dsputil.h:
+	* gst/ffmpegcolorspace/gstffmpeg.c: (plugin_init):
+	* gst/ffmpegcolorspace/gstffmpegcodecmap.c:
+	(gst_ffmpeg_pixfmt_to_caps), (gst_ffmpeg_pix_fmt_to_caps),
+	(gst_ffmpeg_caps_to_pix_fmt):
+	* gst/ffmpegcolorspace/gstffmpegcodecmap.h:
+	* gst/ffmpegcolorspace/gstffmpegcolorspace.c:
+	(gst_ffmpegcolorspace_caps_remove_format_info),
+	(gst_ffmpegcolorspace_getcaps), (gst_ffmpegcolorspace_pad_link),
+	(gst_ffmpegcolorspace_get_type), (gst_ffmpegcolorspace_base_init),
+	(gst_ffmpegcolorspace_class_init), (gst_ffmpegcolorspace_init),
+	(gst_ffmpegcolorspace_chain), (gst_ffmpegcolorspace_change_state),
+	(gst_ffmpegcolorspace_set_property),
+	(gst_ffmpegcolorspace_get_property),
+	(gst_ffmpegcolorspace_register):
+	* gst/ffmpegcolorspace/imgconvert.c:
+	(avcodec_get_chroma_sub_sample), (avcodec_get_pix_fmt_name),
+	(avcodec_get_pix_fmt), (avpicture_fill), (avpicture_layout),
+	(avpicture_get_size), (avcodec_get_pix_fmt_loss),
+	(avg_bits_per_pixel), (avcodec_find_best_pix_fmt1),
+	(avcodec_find_best_pix_fmt), (img_copy_plane), (img_copy),
+	(yuv422_to_yuv420p), (yuv422_to_yuv422p), (yuv422p_to_yuv422),
+	(C_JPEG_TO_CCIR), (img_convert_init), (img_apply_table),
+	(shrink41), (shrink21), (shrink12), (shrink22), (shrink44),
+	(grow21_line), (grow41_line), (grow21), (grow22), (grow41),
+	(grow44), (conv411), (gif_clut_index), (build_rgb_palette),
+	(bitcopy_n), (mono_to_gray), (monowhite_to_gray),
+	(monoblack_to_gray), (gray_to_mono), (gray_to_monowhite),
+	(gray_to_monoblack), (avpicture_alloc), (avpicture_free),
+	(is_yuv_planar), (img_convert), (get_alpha_info_pal8),
+	(img_get_alpha_info), (deinterlace_line),
+	(deinterlace_line_inplace), (deinterlace_bottom_field),
+	(deinterlace_bottom_field_inplace), (avpicture_deinterlace):
+	* gst/ffmpegcolorspace/imgconvert_template.h:
+	* gst/ffmpegcolorspace/mem.c: (av_malloc), (av_realloc), (av_free):
+	* gst/ffmpegcolorspace/mmx.h:
+	* gst/ffmpegcolorspace/utils.c: (avcodec_init):
+          adding ffmpegcolorspace element supplied by Ronald after cleaning
+          up and pulling in the right bits of upstream source. 
+          I'm sure a better C/compiler wizard could do some cleaning up (for
+          example use GLIB's malloc stuff), but as a first pass this
+          works very well
+
 2004-03-15  Thomas Vander Stichele  <thomas at apestaart dot org>
 
 	* ext/alsa/gstalsa.h:
diff --git a/configure.ac b/configure.ac
index c36810ea96..c4f7b93e9b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -87,6 +87,11 @@ AC_HEADER_STDC([])
 AC_C_INLINE
 AX_CREATE_STDINT_H
 
+dnl Check for malloc.h
+AC_CHECK_HEADER(malloc.h,[
+  AC_DEFINE(HAVE_MALLOC_H, 1, [whether malloc.h available])
+])
+
 dnl Check for a way to display the function name in debug output
 GST_CHECK_FUNCTION()
 
@@ -348,6 +353,7 @@ GST_PLUGINS_ALL="\
 	deinterlace \
 	effectv \
 	festival \
+	ffmpegcolorspace \
 	filter \
 	flx \
 	goom \
@@ -1597,6 +1603,7 @@ gst/debug/Makefile
 gst/deinterlace/Makefile
 gst/effectv/Makefile
 gst/festival/Makefile
+gst/ffmpegcolorspace/Makefile
 gst/filter/Makefile
 gst/flx/Makefile
 gst/goom/Makefile
diff --git a/gst/ffmpegcolorspace/Makefile.am b/gst/ffmpegcolorspace/Makefile.am
new file mode 100644
index 0000000000..5f7148f993
--- /dev/null
+++ b/gst/ffmpegcolorspace/Makefile.am
@@ -0,0 +1,19 @@
+plugin_LTLIBRARIES = libgstffmpegcolorspace.la
+
+libgstffmpegcolorspace_la_SOURCES = \
+	gstffmpeg.c \
+	gstffmpegcolorspace.c \
+	gstffmpegcodecmap.c \
+	dsputil.c \
+	mem.c \
+	utils.c \
+	imgconvert.c
+
+libgstffmpegcolorspace_la_CFLAGS = $(GST_CFLAGS)
+libgstffmpegcolorspace_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS)
+
+noinst_HEADERS = \
+	gstffmpegcodecmap.h \
+	imgconvert_template.h \
+	mmx.h \
+	avcodec.h
diff --git a/gst/ffmpegcolorspace/avcodec.h b/gst/ffmpegcolorspace/avcodec.h
new file mode 100644
index 0000000000..8db12278c1
--- /dev/null
+++ b/gst/ffmpegcolorspace/avcodec.h
@@ -0,0 +1,164 @@
+#ifndef AVCODEC_H
+#define AVCODEC_H
+
+/**
+ * @file avcodec.h
+ * external api header.
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "common.h"
+#include <sys/types.h> /* size_t */
+
+#define FFMPEG_VERSION_INT     0x000408
+#define FFMPEG_VERSION         "0.4.8"
+#define LIBAVCODEC_BUILD       4707
+
+#define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT
+#define LIBAVCODEC_VERSION     FFMPEG_VERSION
+
+#define AV_STRINGIFY(s)	AV_TOSTRING(s)
+#define AV_TOSTRING(s) #s
+#define LIBAVCODEC_IDENT	"FFmpeg" LIBAVCODEC_VERSION "b" AV_STRINGIFY(LIBAVCODEC_BUILD)
+
+#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
+#define AV_TIME_BASE 1000000
+
+enum CodecType {
+    CODEC_TYPE_UNKNOWN = -1,
+    CODEC_TYPE_VIDEO,
+    CODEC_TYPE_AUDIO,
+    CODEC_TYPE_DATA,
+};
+
+/**
+ * Pixel format. Notes: 
+ *
+ * PIX_FMT_RGBA32 is handled in an endian-specific manner. A RGBA
+ * color is put together as:
+ *  (A << 24) | (R << 16) | (G << 8) | B
+ * This is stored as BGRA on little endian CPU architectures and ARGB on
+ * big endian CPUs.
+ *
+ * When the pixel format is palettized RGB (PIX_FMT_PAL8), the palettized
+ * image data is stored in AVFrame.data[0]. The palette is transported in
+ * AVFrame.data[1] and, is 1024 bytes long (256 4-byte entries) and is
+ * formatted the same as in PIX_FMT_RGBA32 described above (i.e., it is
+ * also endian-specific). Note also that the individual RGB palette
+ * components stored in AVFrame.data[1] should be in the range 0..255.
+ * This is important as many custom PAL8 video codecs that were designed
+ * to run on the IBM VGA graphics adapter use 6-bit palette components.
+ */
+enum PixelFormat {
+    PIX_FMT_YUV420P,   ///< Planar YUV 4:2:0 (1 Cr & Cb sample per 2x2 Y samples)
+    PIX_FMT_YUV422,    
+    PIX_FMT_RGB24,     ///< Packed pixel, 3 bytes per pixel, RGBRGB...
+    PIX_FMT_BGR24,     ///< Packed pixel, 3 bytes per pixel, BGRBGR...
+    PIX_FMT_YUV422P,   ///< Planar YUV 4:2:2 (1 Cr & Cb sample per 2x1 Y samples)
+    PIX_FMT_YUV444P,   ///< Planar YUV 4:4:4 (1 Cr & Cb sample per 1x1 Y samples)
+    PIX_FMT_RGBA32,    ///< Packed pixel, 4 bytes per pixel, BGRABGRA..., stored in cpu endianness
+    PIX_FMT_YUV410P,   ///< Planar YUV 4:1:0 (1 Cr & Cb sample per 4x4 Y samples)
+    PIX_FMT_YUV411P,   ///< Planar YUV 4:1:1 (1 Cr & Cb sample per 4x1 Y samples)
+    PIX_FMT_RGB565,    ///< always stored in cpu endianness 
+    PIX_FMT_RGB555,    ///< always stored in cpu endianness, most significant bit to 1 
+    PIX_FMT_GRAY8,
+    PIX_FMT_MONOWHITE, ///< 0 is white 
+    PIX_FMT_MONOBLACK, ///< 0 is black 
+    PIX_FMT_PAL8,      ///< 8 bit with RGBA palette 
+    PIX_FMT_YUVJ420P,  ///< Planar YUV 4:2:0 full scale (jpeg)
+    PIX_FMT_YUVJ422P,  ///< Planar YUV 4:2:2 full scale (jpeg)
+    PIX_FMT_YUVJ444P,  ///< Planar YUV 4:4:4 full scale (jpeg)
+    PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing(xvmc_render.h)
+    PIX_FMT_XVMC_MPEG2_IDCT,
+    PIX_FMT_NB,
+};
+
+/**
+ * four components are given, that's all.
+ * the last component is alpha
+ */
+typedef struct AVPicture {
+    uint8_t *data[4];
+    int linesize[4];       ///< number of bytes per line
+} AVPicture;
+
+/**
+ * Allocate memory for a picture.  Call avpicture_free to free it.
+ *
+ * @param picture the picture to be filled in.
+ * @param pix_fmt the format of the picture.
+ * @param width the width of the picture.
+ * @param height the height of the picture.
+ * @return 0 if successful, -1 if not.
+ */
+int avpicture_alloc(AVPicture *picture, int pix_fmt, int width, int height);
+
+/* Free a picture previously allocated by avpicture_alloc. */
+void avpicture_free(AVPicture *picture);
+
+int avpicture_fill(AVPicture *picture, uint8_t *ptr,
+                   int pix_fmt, int width, int height);
+int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
+                     unsigned char *dest, int dest_size);
+int avpicture_get_size(int pix_fmt, int width, int height);
+void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift);
+const char *avcodec_get_pix_fmt_name(int pix_fmt);
+enum PixelFormat avcodec_get_pix_fmt(const char* name);
+
+#define FF_LOSS_RESOLUTION  0x0001 /* loss due to resolution change */
+#define FF_LOSS_DEPTH       0x0002 /* loss due to color depth change */
+#define FF_LOSS_COLORSPACE  0x0004 /* loss due to color space conversion */
+#define FF_LOSS_ALPHA       0x0008 /* loss of alpha bits */
+#define FF_LOSS_COLORQUANT  0x0010 /* loss due to color quantization */
+#define FF_LOSS_CHROMA      0x0020 /* loss of chroma (e.g. rgb to gray conversion) */
+
+int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt,
+                             int has_alpha);
+int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
+                              int has_alpha, int *loss_ptr);
+
+#define FF_ALPHA_TRANSP       0x0001 /* image has some totally transparent pixels */
+#define FF_ALPHA_SEMI_TRANSP  0x0002 /* image has some transparent pixels */
+int img_get_alpha_info(const AVPicture *src,
+		       int pix_fmt, int width, int height);
+
+/* convert among pixel formats */
+int img_convert(AVPicture *dst, int dst_pix_fmt,
+                const AVPicture *src, int pix_fmt, 
+                int width, int height);
+
+/* deinterlace a picture */
+int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
+                          int pix_fmt, int width, int height);
+
+void avcodec_init(void);
+
+/* memory */
+void *av_malloc(unsigned int size);
+void *av_mallocz(unsigned int size);
+void *av_realloc(void *ptr, unsigned int size);
+void av_free(void *ptr);
+char *av_strdup(const char *s);
+void av_freep(void *ptr);
+void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size);
+/* for static data only */
+/* call av_free_static to release all staticaly allocated tables */
+void av_free_static(void);
+void *__av_mallocz_static(void** location, unsigned int size);
+#define av_mallocz_static(p, s) __av_mallocz_static((void **)(p), s)
+
+/* add by bero : in adx.c */
+int is_adx(const unsigned char *buf,size_t bufsize);
+
+void img_copy(AVPicture *dst, const AVPicture *src,
+              int pix_fmt, int width, int height);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* AVCODEC_H */
diff --git a/gst/ffmpegcolorspace/common.h b/gst/ffmpegcolorspace/common.h
new file mode 100644
index 0000000000..de5e2caf8d
--- /dev/null
+++ b/gst/ffmpegcolorspace/common.h
@@ -0,0 +1,1196 @@
+/**
+ * @file common.h
+ * common internal api header.
+ */
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#if defined(WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
+#    define CONFIG_WIN32
+#endif
+
+//#define ALT_BITSTREAM_WRITER
+//#define ALIGNED_BITSTREAM_WRITER
+
+#define ALT_BITSTREAM_READER
+//#define LIBMPEG2_BITSTREAM_READER
+//#define A32_BITSTREAM_READER
+#define LIBMPEG2_BITSTREAM_READER_HACK //add BERO
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+#ifdef HAVE_AV_CONFIG_H
+/* only include the following when compiling package */
+#    include "config.h"
+
+#    include <stdlib.h>
+#    include <stdio.h>
+#    include <string.h>
+#    include <ctype.h>
+#    ifndef __BEOS__
+#        include <errno.h>
+#    else
+#        include "berrno.h"
+#    endif
+#    include <math.h>
+
+#    ifndef ENODATA
+#        define ENODATA  61
+#    endif
+
+#include <stddef.h>
+#ifndef offsetof
+# define offsetof(T,F) ((unsigned int)((char *)&((T *)0)->F))
+#endif
+
+#define AVOPTION_CODEC_BOOL(name, help, field) \
+    { name, help, offsetof(AVCodecContext, field), FF_OPT_TYPE_BOOL }
+#define AVOPTION_CODEC_DOUBLE(name, help, field, minv, maxv, defval) \
+    { name, help, offsetof(AVCodecContext, field), FF_OPT_TYPE_DOUBLE, minv, maxv, defval }
+#define AVOPTION_CODEC_FLAG(name, help, field, flag, defval) \
+    { name, help, offsetof(AVCodecContext, field), FF_OPT_TYPE_FLAG, flag, 0, defval }
+#define AVOPTION_CODEC_INT(name, help, field, minv, maxv, defval) \
+    { name, help, offsetof(AVCodecContext, field), FF_OPT_TYPE_INT, minv, maxv, defval }
+#define AVOPTION_CODEC_STRING(name, help, field, str, val) \
+    { name, help, offsetof(AVCodecContext, field), FF_OPT_TYPE_STRING, .defval = val, .defstr = str }
+#define AVOPTION_CODEC_RCOVERRIDE(name, help, field) \
+    { name, help, offsetof(AVCodecContext, field), FF_OPT_TYPE_RCOVERRIDE, .defval = 0, .defstr = NULL }
+#define AVOPTION_SUB(ptr) { .name = NULL, .help = (const char*)ptr }
+#define AVOPTION_END() AVOPTION_SUB(NULL)
+
+struct AVOption;
+#ifdef HAVE_MMX
+extern const struct AVOption avoptions_common[3 + 5];
+#else
+extern const struct AVOption avoptions_common[3];
+#endif
+extern const struct AVOption avoptions_workaround_bug[11];
+
+#endif /* HAVE_AV_CONFIG_H */
+
+/* Suppress restrict if it was not defined in config.h.  */
+#ifndef restrict
+#    define restrict
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#    define always_inline __attribute__((always_inline)) inline
+#else
+#    define always_inline inline
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#    define attribute_used __attribute__((used))
+#else
+#    define attribute_used
+#endif
+
+#ifndef EMULATE_INTTYPES
+#   include <inttypes.h>
+#else
+    typedef signed char  int8_t;
+    typedef signed short int16_t;
+    typedef signed int   int32_t;
+    typedef unsigned char  uint8_t;
+    typedef unsigned short uint16_t;
+    typedef unsigned int   uint32_t;
+
+#   ifdef CONFIG_WIN32
+        typedef signed __int64   int64_t;
+        typedef unsigned __int64 uint64_t;
+#   else /* other OS */
+        typedef signed long long   int64_t;
+        typedef unsigned long long uint64_t;
+#   endif /* other OS */
+#endif /* HAVE_INTTYPES_H */
+
+#ifndef INT64_MAX
+#define INT64_MAX int64_t_C(9223372036854775807)
+#endif
+
+#ifdef EMULATE_FAST_INT
+/* note that we don't emulate 64bit ints */
+typedef signed char int_fast8_t;
+typedef signed int  int_fast16_t;
+typedef signed int  int_fast32_t;
+typedef unsigned char uint_fast8_t;
+typedef unsigned int  uint_fast16_t;
+typedef unsigned int  uint_fast32_t;
+#endif
+
+#if defined(CONFIG_OS2) || defined(CONFIG_SUNOS)
+static inline float floorf(float f) { 
+    return floor(f); 
+}
+#endif
+
+#ifdef CONFIG_WIN32
+
+/* windows */
+
+#    ifndef __MINGW32__
+#        define int64_t_C(c)     (c ## i64)
+#        define uint64_t_C(c)    (c ## i64)
+
+#    ifdef HAVE_AV_CONFIG_H
+#            define inline __inline
+#    endif
+
+#    else
+#        define int64_t_C(c)     (c ## LL)
+#        define uint64_t_C(c)    (c ## ULL)
+#    endif /* __MINGW32__ */
+
+#    ifdef HAVE_AV_CONFIG_H
+#        ifdef _DEBUG
+#            define DEBUG
+#        endif
+
+#        define snprintf _snprintf
+#        define vsnprintf _vsnprintf
+#    endif
+
+/* CONFIG_WIN32 end */
+#elif defined (CONFIG_OS2)
+/* OS/2 EMX */
+
+#ifndef int64_t_C
+#define int64_t_C(c)     (c ## LL)
+#define uint64_t_C(c)    (c ## ULL)
+#endif
+
+#ifdef HAVE_AV_CONFIG_H
+
+#include <float.h>
+
+#endif /* HAVE_AV_CONFIG_H */
+
+/* CONFIG_OS2 end */
+#else
+
+/* unix */
+
+#ifndef int64_t_C
+#define int64_t_C(c)     (c ## LL)
+#define uint64_t_C(c)    (c ## ULL)
+#endif
+
+#endif /* !CONFIG_WIN32 && !CONFIG_OS2 */
+
+#ifdef HAVE_AV_CONFIG_H
+
+#    include "bswap.h"
+
+#    if defined(__MINGW32__) || defined(__CYGWIN__) || \
+        defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__))
+#        define MANGLE(a) "_" #a
+#    else
+#        define MANGLE(a) #a
+#    endif
+
+/* debug stuff */
+
+#    ifndef DEBUG
+#        define NDEBUG
+#    endif
+#    include <assert.h>
+
+/* dprintf macros */
+#    if defined(CONFIG_WIN32) && !defined(__MINGW32__)
+
+inline void dprintf(const char* fmt,...) {}
+
+#    else
+
+#        ifdef DEBUG
+#            define dprintf(fmt,...) printf(fmt, __VA_ARGS__)
+#        else
+#            define dprintf(fmt,...)
+#        endif
+
+#    endif /* !CONFIG_WIN32 */
+
+#    define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
+
+//rounded divison & shift
+#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
+/* assume b>0 */
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+#define ABS(a) ((a) >= 0 ? (a) : (-(a)))
+
+#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
+#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
+
+extern const uint32_t inverse[256];
+
+#ifdef ARCH_X86
+#    define FASTDIV(a,b) \
+    ({\
+        int ret,dmy;\
+        asm volatile(\
+            "mull %3"\
+            :"=d"(ret),"=a"(dmy)\
+            :"1"(a),"g"(inverse[b])\
+            );\
+        ret;\
+    })
+#elif defined(CONFIG_FASTDIV)
+#    define FASTDIV(a,b)   ((uint32_t)((((uint64_t)a)*inverse[b])>>32))
+#else
+#    define FASTDIV(a,b)   ((a)/(b))
+#endif
+ 
+#ifdef ARCH_X86
+// avoid +32 for shift optimization (gcc should do that ...)
+static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
+    asm ("sarl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
+    asm ("shrl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+#else
+#    define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#    define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+
+/* bit output */
+
+struct PutBitContext;
+
+typedef void (*WriteDataFunc)(void *, uint8_t *, int);
+
+typedef struct PutBitContext {
+#ifdef ALT_BITSTREAM_WRITER
+    uint8_t *buf, *buf_end;
+    int index;
+#else
+    uint32_t bit_buf;
+    int bit_left;
+    uint8_t *buf, *buf_ptr, *buf_end;
+#endif
+} PutBitContext;
+
+void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size);
+
+int put_bits_count(PutBitContext *s);
+void align_put_bits(PutBitContext *s);
+void flush_put_bits(PutBitContext *s);
+void put_string(PutBitContext * pbc, char *s, int put_zero);
+
+/* bit input */
+
+typedef struct GetBitContext {
+    const uint8_t *buffer, *buffer_end;
+#ifdef ALT_BITSTREAM_READER
+    int index;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    uint8_t *buffer_ptr;
+    uint32_t cache;
+    int bit_count;
+#elif defined A32_BITSTREAM_READER
+    uint32_t *buffer_ptr;
+    uint32_t cache0;
+    uint32_t cache1;
+    int bit_count;
+#endif
+    int size_in_bits;
+} GetBitContext;
+
+static inline int get_bits_count(GetBitContext *s);
+
+#define VLC_TYPE int16_t
+
+typedef struct VLC {
+    int bits;
+    VLC_TYPE (*table)[2]; ///< code, bits
+    int table_size, table_allocated;
+} VLC;
+
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+
+#ifdef ARCH_SPARC64
+#define UNALIGNED_STORES_ARE_BAD
+#endif
+
+/* used to avoid missaligned exceptions on some archs (alpha, ...) */
+#ifdef ARCH_X86
+#    define unaligned32(a) (*(uint32_t*)(a))
+#else
+#    ifdef __GNUC__
+static inline uint32_t unaligned32(const void *v) {
+    struct Unaligned {
+	uint32_t i;
+    } __attribute__((packed));
+
+    return ((const struct Unaligned *) v)->i;
+}
+#    elif defined(__DECC)
+static inline uint32_t unaligned32(const void *v) {
+    return *(const __unaligned uint32_t *) v;
+}
+#    else
+static inline uint32_t unaligned32(const void *v) {
+    return *(const uint32_t *) v;
+}
+#    endif
+#endif //!ARCH_X86
+
+#ifndef ALT_BITSTREAM_WRITER
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+{
+    unsigned int bit_buf;
+    int bit_left;
+
+#ifdef STATS
+    st_out_bit_counts[st_current_index] += n;
+#endif
+    //    printf("put_bits=%d %x\n", n, value);
+    assert(n == 32 || value < (1U << n));
+    
+    bit_buf = s->bit_buf;
+    bit_left = s->bit_left;
+
+    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+    if (n < bit_left) {
+        bit_buf = (bit_buf<<n) | value;
+        bit_left-=n;
+    } else {
+	bit_buf<<=bit_left;
+        bit_buf |= value >> (n - bit_left);
+#ifdef UNALIGNED_STORES_ARE_BAD
+        if (3 & (int) s->buf_ptr) {
+            s->buf_ptr[0] = bit_buf >> 24;
+            s->buf_ptr[1] = bit_buf >> 16;
+            s->buf_ptr[2] = bit_buf >>  8;
+            s->buf_ptr[3] = bit_buf      ;
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
+        //printf("bitbuf = %08x\n", bit_buf);
+        s->buf_ptr+=4;
+	bit_left+=32 - n;
+        bit_buf = value;
+    }
+
+    s->bit_buf = bit_buf;
+    s->bit_left = bit_left;
+}
+#endif
+
+
+#ifdef ALT_BITSTREAM_WRITER
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+{
+#    ifdef ALIGNED_BITSTREAM_WRITER
+#        ifdef ARCH_X86
+    asm volatile(
+	"movl %0, %%ecx			\n\t"
+	"xorl %%eax, %%eax		\n\t"
+	"shrdl %%cl, %1, %%eax		\n\t"
+	"shrl %%cl, %1			\n\t"
+	"movl %0, %%ecx			\n\t"
+	"shrl $3, %%ecx			\n\t"
+	"andl $0xFFFFFFFC, %%ecx	\n\t"
+	"bswapl %1			\n\t"
+	"orl %1, (%2, %%ecx)		\n\t"
+	"bswapl %%eax			\n\t"
+	"addl %3, %0			\n\t"
+	"movl %%eax, 4(%2, %%ecx)	\n\t"
+	: "=&r" (s->index), "=&r" (value)
+	: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
+	: "%eax", "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
+    
+    value<<= 32-n; 
+    
+    ptr[0] |= be2me_32(value>>(index&31));
+    ptr[1]  = be2me_32(value<<(32-(index&31)));
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    else //ALIGNED_BITSTREAM_WRITER
+#        ifdef ARCH_X86
+    asm volatile(
+	"movl $7, %%ecx			\n\t"
+	"andl %0, %%ecx			\n\t"
+	"addl %3, %%ecx			\n\t"
+	"negl %%ecx			\n\t"
+	"shll %%cl, %1			\n\t"
+	"bswapl %1			\n\t"
+	"movl %0, %%ecx			\n\t"
+	"shrl $3, %%ecx			\n\t"
+	"orl %1, (%%ecx, %2)		\n\t"
+	"addl %3, %0			\n\t"
+	"movl $0, 4(%%ecx, %2)		\n\t"
+	: "=&r" (s->index), "=&r" (value)
+	: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
+	: "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
+    
+    ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
+    ptr[1] = 0;
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    endif //!ALIGNED_BITSTREAM_WRITER
+}
+#endif
+
+
+static inline uint8_t* pbBufPtr(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+	return s->buf + (s->index>>3);
+#else
+	return s->buf_ptr;
+#endif
+}
+
+/**
+ *
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n){
+        assert((put_bits_count(s)&7)==0);
+#ifdef ALT_BITSTREAM_WRITER
+        FIXME may need some cleaning of the buffer
+	s->index += n<<3;
+#else
+        assert(s->bit_left==32);
+	s->buf_ptr += n;
+#endif    
+}
+
+/**
+ * Changes the end of the buffer.
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size){
+    s->buf_end= s->buf + size;
+}
+
+/* Bitstream reader API docs:
+name
+    abritary name which is used as prefix for the internal variables
+
+gb
+    getbitcontext
+
+OPEN_READER(name, gb)
+    loads gb into local variables
+
+CLOSE_READER(name, gb)
+    stores local vars in gb
+
+UPDATE_CACHE(name, gb)
+    refills the internal cache from the bitstream
+    after this call at least MIN_CACHE_BITS will be available,
+
+GET_CACHE(name, gb)
+    will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
+
+SHOW_UBITS(name, gb, num)
+    will return the nest num bits
+
+SHOW_SBITS(name, gb, num)
+    will return the nest num bits and do sign extension
+
+SKIP_BITS(name, gb, num)
+    will skip over the next num bits
+    note, this is equinvalent to SKIP_CACHE; SKIP_COUNTER
+
+SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
+
+SKIP_COUNTER(name, gb, num)
+    will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
+
+LAST_SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
+
+LAST_SKIP_BITS(name, gb, num)
+    is equinvalent to SKIP_LAST_CACHE; SKIP_COUNTER
+
+for examples see get_bits, show_bits, skip_bits, get_vlc
+*/
+
+static inline int unaligned32_be(const void *v)
+{
+#ifdef CONFIG_ALIGN
+	const uint8_t *p=v;
+	return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
+#else
+	return be2me_32( unaligned32(v)); //original
+#endif
+}
+
+#ifdef ALT_BITSTREAM_READER
+#   define MIN_CACHE_BITS 25
+
+#   define OPEN_READER(name, gb)\
+        int name##_index= (gb)->index;\
+        int name##_cache= 0;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->index= name##_index;\
+
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= unaligned32_be( ((uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+
+// FIXME name?
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_index += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) ;
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(GetBitContext *s){
+    return s->index;
+}
+#elif defined LIBMPEG2_BITSTREAM_READER
+//libmpeg2 like reader
+
+#   define MIN_CACHE_BITS 17
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        int name##_cache= (gb)->cache;\
+        uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache= name##_cache;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#ifdef LIBMPEG2_BITSTREAM_READER_HACK
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count >= 0){\
+        name##_cache+= (int)be2me_16(*(uint16_t*)name##_buffer_ptr) << name##_bit_count;\
+        ((uint16_t*)name##_buffer_ptr)++;\
+        name##_bit_count-= 16;\
+    }\
+
+#else
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count >= 0){\
+        name##_cache+= ((name##_buffer_ptr[0]<<8) + name##_buffer_ptr[1]) << name##_bit_count;\
+        name##_buffer_ptr+=2;\
+        name##_bit_count-= 16;\
+    }\
+
+#endif
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(GetBitContext *s){
+    return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
+}
+
+#elif defined A32_BITSTREAM_READER
+
+#   define MIN_CACHE_BITS 32
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        uint32_t name##_cache0= (gb)->cache0;\
+        uint32_t name##_cache1= (gb)->cache1;\
+        uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache0= name##_cache0;\
+        (gb)->cache1= name##_cache1;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count > 0){\
+        const uint32_t next= be2me_32( *name##_buffer_ptr );\
+        name##_cache0 |= NEG_USR32(next,name##_bit_count);\
+        name##_cache1 |= next<<name##_bit_count;\
+        name##_buffer_ptr++;\
+        name##_bit_count-= 32;\
+    }\
+
+#ifdef ARCH_X86
+#   define SKIP_CACHE(name, gb, num)\
+        asm(\
+            "shldl %2, %1, %0		\n\t"\
+            "shll %2, %1		\n\t"\
+            : "+r" (name##_cache0), "+r" (name##_cache1)\
+            : "Ic" ((uint8_t)num)\
+           );
+#else
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache0 <<= (num);\
+        name##_cache0 |= NEG_USR32(name##_cache1,num);\
+        name##_cache1 <<= (num);
+#endif
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache0, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache0, num)
+
+#   define GET_CACHE(name, gb)\
+        (name##_cache0)
+
+static inline int get_bits_count(GetBitContext *s){
+    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
+}
+
+#endif
+
+/**
+ * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
+ * if MSB not set it is negative 
+ * @param n length in bits
+ * @author BERO  
+ */
+static inline int get_xbits(GetBitContext *s, int n){
+    register int tmp;
+    register int32_t cache;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    cache = GET_CACHE(re,s);
+    if ((int32_t)cache<0) { //MSB=1
+        tmp = NEG_USR32(cache,n);
+    } else {
+    //   tmp = (-1<<n) | NEG_USR32(cache,n) + 1; mpeg12.c algo
+    //   tmp = - (NEG_USR32(cache,n) ^ ((1 << n) - 1)); h263.c algo
+        tmp = - NEG_USR32(~cache,n);
+    }
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+static inline int get_sbits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_SBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+/**
+ * reads 0-17 bits.
+ * Note, the alt bitstream reader can read upto 25 bits, but the libmpeg2 reader cant
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+unsigned int get_bits_long(GetBitContext *s, int n);
+
+/**
+ * shows 0-17 bits.
+ * Note, the alt bitstream reader can read upto 25 bits, but the libmpeg2 reader cant
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+//    CLOSE_READER(re, s)
+    return tmp;
+}
+
+unsigned int show_bits_long(GetBitContext *s, int n);
+
+static inline void skip_bits(GetBitContext *s, int n){
+ //Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+}
+
+static inline unsigned int get_bits1(GetBitContext *s){
+#ifdef ALT_BITSTREAM_READER
+    int index= s->index;
+    uint8_t result= s->buffer[ index>>3 ];
+    result<<= (index&0x07);
+    result>>= 8 - 1;
+    index++;
+    s->index= index;
+
+    return result;
+#else
+    return get_bits(s, 1);
+#endif
+}
+
+static inline unsigned int show_bits1(GetBitContext *s){
+    return show_bits(s, 1);
+}
+
+static inline void skip_bits1(GetBitContext *s){
+    skip_bits(s, 1);
+}
+
+void init_get_bits(GetBitContext *s,
+                   const uint8_t *buffer, int buffer_size);
+
+int check_marker(GetBitContext *s, const char *msg);
+void align_get_bits(GetBitContext *s);
+int init_vlc(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size);
+void free_vlc(VLC *vlc);
+
+/**
+ *
+ * if the vlc code is invalid and max_depth=1 than no bits will be removed
+ * if the vlc code is invalid and max_depth>1 than the number of bits removed
+ * is undefined
+ */
+#define GET_VLC(code, name, gb, table, bits, max_depth)\
+{\
+    int n, index, nb_bits;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    code = table[index][0];\
+    n    = table[index][1];\
+\
+    if(max_depth > 1 && n < 0){\
+        LAST_SKIP_BITS(name, gb, bits)\
+        UPDATE_CACHE(name, gb)\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + code;\
+        code = table[index][0];\
+        n    = table[index][1];\
+        if(max_depth > 2 && n < 0){\
+            LAST_SKIP_BITS(name, gb, nb_bits)\
+            UPDATE_CACHE(name, gb)\
+\
+            nb_bits = -n;\
+\
+            index= SHOW_UBITS(name, gb, nb_bits) + code;\
+            code = table[index][0];\
+            n    = table[index][1];\
+        }\
+    }\
+    SKIP_BITS(name, gb, n)\
+}
+
+#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth)\
+{\
+    int n, index, nb_bits;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    level = table[index].level;\
+    n     = table[index].len;\
+\
+    if(max_depth > 1 && n < 0){\
+        LAST_SKIP_BITS(name, gb, bits)\
+        UPDATE_CACHE(name, gb)\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + level;\
+        level = table[index].level;\
+        n     = table[index].len;\
+    }\
+    run= table[index].run;\
+    SKIP_BITS(name, gb, n)\
+}
+
+// deprecated, dont use get_vlc for new code, use get_vlc2 instead or use GET_VLC directly
+static inline int get_vlc(GetBitContext *s, VLC *vlc)
+{
+    int code;
+    VLC_TYPE (*table)[2]= vlc->table;
+    
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+
+    GET_VLC(code, re, s, table, vlc->bits, 3)    
+
+    CLOSE_READER(re, s)
+    return code;
+}
+
+/**
+ * parses a vlc code, faster then get_vlc()
+ * @param bits is the number of bits which will be read at once, must be 
+ *             identical to nb_bits in init_vlc()
+ * @param max_depth is the number of times bits bits must be readed to completly
+ *                  read the longest vlc code 
+ *                  = (max_vlc_length + bits - 1) / bits
+ */
+static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+                                  int bits, int max_depth)
+{
+    int code;
+    
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+
+    GET_VLC(code, re, s, table, bits, max_depth)
+
+    CLOSE_READER(re, s)
+    return code;
+}
+
+//#define TRACE
+
+#ifdef TRACE
+
+static inline void print_bin(int bits, int n){
+    int i;
+    
+    for(i=n-1; i>=0; i--){
+        printf("%d", (bits>>i)&1);
+    }
+    for(i=n; i<24; i++)
+        printf(" ");
+}
+
+static inline int get_bits_trace(GetBitContext *s, int n, char *file, char *func, int line){
+    int r= get_bits(s, n);
+    
+    print_bin(r, n);
+    printf("%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int r= get_vlc2(s, table, bits, max_depth);
+    int len= get_bits_count(s) - pos;
+    int bits2= show>>(24-len);
+    
+    print_bin(bits2, len);
+    
+    printf("%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
+    return r;
+}
+static inline int get_xbits_trace(GetBitContext *s, int n, char *file, char *func, int line){
+    int show= show_bits(s, n);
+    int r= get_xbits(s, n);
+    
+    print_bin(show, n);
+    printf("%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+
+#define get_bits(s, n)  get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_bits1(s)    get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc(s, vlc)            get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+
+#define tprintf printf
+
+#else //TRACE
+#define tprintf(...) {}
+#endif
+
+/* define it to include statistics code (useful only for optimizing
+   codec efficiency */
+//#define STATS
+
+#ifdef STATS
+
+enum {
+    ST_UNKNOWN,
+    ST_DC,
+    ST_INTRA_AC,
+    ST_INTER_AC,
+    ST_INTRA_MB,
+    ST_INTER_MB,
+    ST_MV,
+    ST_NB,
+};
+
+extern int st_current_index;
+extern unsigned int st_bit_counts[ST_NB];
+extern unsigned int st_out_bit_counts[ST_NB];
+
+void print_stats(void);
+#endif
+
+/* misc math functions */
+extern const uint8_t ff_log2_tab[256];
+
+static inline int av_log2(unsigned int v)
+{
+    int n;
+
+    n = 0;
+    if (v & 0xffff0000) {
+        v >>= 16;
+        n += 16;
+    }
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    n += ff_log2_tab[v];
+
+    return n;
+}
+
+static inline int av_log2_16bit(unsigned int v)
+{
+    int n;
+
+    n = 0;
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    n += ff_log2_tab[v];
+
+    return n;
+}
+
+/* median of 3 */
+static inline int mid_pred(int a, int b, int c)
+{
+#if 0
+    int t= (a-b)&((a-b)>>31);
+    a-=t;
+    b+=t;
+    b-= (b-c)&((b-c)>>31);
+    b+= (a-b)&((a-b)>>31);
+
+    return b;
+#else
+    if(a>b){
+        if(c>b){
+            if(c>a) b=a;
+            else    b=c;
+        }
+    }else{
+        if(b>c){
+            if(c>a) b=c;
+            else    b=a;
+        }
+    }
+    return b;
+#endif
+}
+
+static inline int clip(int a, int amin, int amax)
+{
+    if (a < amin)
+        return amin;
+    else if (a > amax)
+        return amax;
+    else
+        return a;
+}
+
+/* math */
+extern const uint8_t ff_sqrt_tab[128];
+
+int64_t ff_gcd(int64_t a, int64_t b);
+
+static inline int ff_sqrt(int a)
+{
+    int ret=0;
+    int s;
+    int ret_sq=0;
+    
+    if(a<128) return ff_sqrt_tab[a];
+    
+    for(s=15; s>=0; s--){
+        int b= ret_sq + (1<<(s*2)) + (ret<<s)*2;
+        if(b<=a){
+            ret_sq=b;
+            ret+= 1<<s;
+        }
+    }
+    return ret;
+}
+
+/**
+ * converts fourcc string to int
+ */
+static inline int ff_get_fourcc(const char *s){
+    assert( strlen(s)==4 );
+
+    return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
+}
+
+#define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))
+#define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24))
+
+
+#ifdef ARCH_X86
+#define MASK_ABS(mask, level)\
+            asm volatile(\
+		"cdq			\n\t"\
+		"xorl %1, %0		\n\t"\
+		"subl %1, %0		\n\t"\
+		: "+a" (level), "=&d" (mask)\
+	    );
+#else
+#define MASK_ABS(mask, level)\
+            mask= level>>31;\
+            level= (level^mask)-mask;
+#endif
+
+
+#if __CPU__ >= 686 && !defined(RUNTIME_CPUDETECT)
+#define COPY3_IF_LT(x,y,a,b,c,d)\
+asm volatile (\
+    "cmpl %0, %3	\n\t"\
+    "cmovl %3, %0	\n\t"\
+    "cmovl %4, %1	\n\t"\
+    "cmovl %5, %2	\n\t"\
+    : "+r" (x), "+r" (a), "+r" (c)\
+    : "r" (y), "r" (b), "r" (d)\
+);
+#else
+#define COPY3_IF_LT(x,y,a,b,c,d)\
+if((y)<(x)){\
+     (x)=(y);\
+     (a)=(b);\
+     (c)=(d);\
+}
+#endif
+
+#ifdef ARCH_X86
+static inline long long rdtsc()
+{
+	long long l;
+	asm volatile(	"rdtsc\n\t"
+		: "=A" (l)
+	);
+	return l;
+}
+
+#define START_TIMER \
+uint64_t tend;\
+uint64_t tstart= rdtsc();\
+
+#define STOP_TIMER(id) \
+tend= rdtsc();\
+{\
+  static uint64_t tsum=0;\
+  static int tcount=0;\
+  static int tskip_count=0;\
+  if(tcount<2 || tend - tstart < 8*tsum/tcount){\
+      tsum+= tend - tstart;\
+      tcount++;\
+  }else\
+      tskip_count++;\
+  if(256*256*256*64%(tcount+tskip_count)==0){\
+      av_log(NULL, AV_LOG_DEBUG, "%Ld dezicycles in %s, %d runs, %d skips\n", tsum*10/tcount, id, tcount, tskip_count);\
+  }\
+}
+#endif
+
+#define CLAMP_TO_8BIT(d) ((d > 0xff) ? 0xff : (d < 0) ? 0 : d)
+
+/* avoid usage of various functions */
+#define malloc please_use_av_malloc
+#define free please_use_av_free
+#define realloc please_use_av_realloc
+#if !(defined(LIBAVFORMAT_BUILD) || defined(_FRAMEHOOK_H))
+#define printf please_use_av_log
+#define fprintf please_use_av_log
+#endif
+
+#define CHECKED_ALLOCZ(p, size)\
+{\
+    p= av_mallocz(size);\
+    if(p==NULL && (size)!=0){\
+        perror("malloc");\
+        goto fail;\
+    }\
+}
+
+#endif /* HAVE_AV_CONFIG_H */
+
+#endif /* COMMON_H */
diff --git a/gst/ffmpegcolorspace/dsputil.c b/gst/ffmpegcolorspace/dsputil.c
new file mode 100644
index 0000000000..0e0aeec184
--- /dev/null
+++ b/gst/ffmpegcolorspace/dsputil.c
@@ -0,0 +1,49 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
+ */
+
+/**
+ * @file dsputil.c
+ * DSP utils
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "avcodec.h"
+#include "dsputil.h"
+
+uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
+
+/* init static data */
+void
+dsputil_static_init (void)
+{
+  int i;
+
+  for (i = 0; i < 256; i++)
+    cropTbl[i + MAX_NEG_CROP] = i;
+  for (i = 0; i < MAX_NEG_CROP; i++) {
+    cropTbl[i] = 0;
+    cropTbl[i + MAX_NEG_CROP + 256] = 255;
+  }
+}
diff --git a/gst/ffmpegcolorspace/dsputil.h b/gst/ffmpegcolorspace/dsputil.h
new file mode 100644
index 0000000000..5492a6676a
--- /dev/null
+++ b/gst/ffmpegcolorspace/dsputil.h
@@ -0,0 +1,576 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/**
+ * @file dsputil.h
+ * DSP utils.
+ * note, many functions in here may use MMX which trashes the FPU state, it is
+ * absolutely necessary to call emms_c() between dsp & float/double code
+ */
+
+#ifndef DSPUTIL_H
+#define DSPUTIL_H
+
+#include "common.h"
+#include "avcodec.h"
+
+
+//#define DEBUG
+/* dct code */
+typedef short DCTELEM;
+
+void fdct_ifast (DCTELEM *data);
+void fdct_ifast248 (DCTELEM *data);
+void ff_jpeg_fdct_islow (DCTELEM *data);
+void ff_fdct248_islow (DCTELEM *data);
+
+void j_rev_dct (DCTELEM *data);
+
+void ff_fdct_mmx(DCTELEM *block);
+void ff_fdct_mmx2(DCTELEM *block);
+void ff_fdct_sse2(DCTELEM *block);
+
+/* encoding scans */
+extern const uint8_t ff_alternate_horizontal_scan[64];
+extern const uint8_t ff_alternate_vertical_scan[64];
+extern const uint8_t ff_zigzag_direct[64];
+extern const uint8_t ff_zigzag248_direct[64];
+
+/* pixel operations */
+#define MAX_NEG_CROP 384
+
+/* temporary */
+extern uint32_t squareTbl[512];
+extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
+
+/* VP3 DSP functions */
+void vp3_dsp_init_c(void);
+void vp3_idct_put_c(int16_t *input_data, int16_t *dequant_matrix,
+    int coeff_count, uint8_t *dest, int stride);
+void vp3_idct_add_c(int16_t *input_data, int16_t *dequant_matrix,
+    int coeff_count, uint8_t *dest, int stride);
+
+void vp3_dsp_init_mmx(void);
+void vp3_idct_put_mmx(int16_t *input_data, int16_t *dequant_matrix,
+    int coeff_count, uint8_t *dest, int stride);
+void vp3_idct_add_mmx(int16_t *input_data, int16_t *dequant_matrix,
+    int coeff_count, uint8_t *dest, int stride);
+
+
+/* minimum alignment rules ;)
+if u notice errors in the align stuff, need more alignment for some asm code for some cpu
+or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...
+
+!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
+i (michael) didnt check them, these are just the alignents which i think could be reached easily ...
+
+!future video codecs might need functions with less strict alignment
+*/
+
+/*
+void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
+void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
+void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void clear_blocks_c(DCTELEM *blocks);
+*/
+
+/* add and put pixel (decoding) */
+// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
+//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
+typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
+typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
+typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
+
+#define DEF_OLD_QPEL(name)\
+void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+
+DEF_OLD_QPEL(qpel16_mc11_old_c)
+DEF_OLD_QPEL(qpel16_mc31_old_c)
+DEF_OLD_QPEL(qpel16_mc12_old_c)
+DEF_OLD_QPEL(qpel16_mc32_old_c)
+DEF_OLD_QPEL(qpel16_mc13_old_c)
+DEF_OLD_QPEL(qpel16_mc33_old_c)
+DEF_OLD_QPEL(qpel8_mc11_old_c)
+DEF_OLD_QPEL(qpel8_mc31_old_c)
+DEF_OLD_QPEL(qpel8_mc12_old_c)
+DEF_OLD_QPEL(qpel8_mc32_old_c)
+DEF_OLD_QPEL(qpel8_mc13_old_c)
+DEF_OLD_QPEL(qpel8_mc33_old_c)
+
+#define CALL_2X_PIXELS(a, b, n)\
+static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    b(block  , pixels  , line_size, h);\
+    b(block+n, pixels+n, line_size, h);\
+}
+
+/* motion estimation */
+// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
+// allthough currently h<4 is not used as functions with width <8 are not used and neither implemented
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
+
+
+/**
+ * DSPContext.
+ */
+typedef struct DSPContext {
+    /* pixel ops : interface with DCT */
+    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
+    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
+    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    /**
+     * translational global motion compensation.
+     */
+    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
+    /**
+     * global motion compensation.
+     */
+    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
+		    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
+    int (*pix_sum)(uint8_t * pix, int line_size);
+    int (*pix_norm1)(uint8_t * pix, int line_size);
+// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
+    
+    me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[5];
+    me_cmp_func hadamard8_diff[5];
+    me_cmp_func dct_sad[5];
+    me_cmp_func quant_psnr[5];
+    me_cmp_func bit[5];
+    me_cmp_func rd[5];
+    me_cmp_func vsad[5];
+    me_cmp_func vsse[5];
+
+    me_cmp_func me_pre_cmp[5];
+    me_cmp_func me_cmp[5];
+    me_cmp_func me_sub_cmp[5];
+    me_cmp_func mb_cmp[5];
+    me_cmp_func ildct_cmp[5]; //only width 16 used
+
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[4][4] of motion compensation funcions for 4 
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * This is an array[4][4] of motion compensation functions for 4 
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b+1)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation funcions for 2 
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_no_rnd_pixels_tab[2][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation funcions for 2 
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_no_rnd_pixels_tab[2][4];
+    
+    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
+    
+    /**
+     * Thirdpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
+     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+
+    qpel_mc_func put_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func put_mspel_pixels_tab[8];
+    
+    /**
+     * h264 Chram MC
+     */
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
+
+    qpel_mc_func put_h264_qpel_pixels_tab[3][16];
+    qpel_mc_func avg_h264_qpel_pixels_tab[3][16];
+    
+    me_cmp_func pix_abs[2][4];
+    
+    /* huffyuv specific */
+    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
+    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
+    /**
+     * subtract huffyuv's variant of median prediction
+     * note, this might read from src1[-1], src2[-1]
+     */
+    void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
+    void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w);
+    
+    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
+
+    /* (I)DCT */
+    void (*fdct)(DCTELEM *block/* align 16*/);
+    void (*fdct248)(DCTELEM *block/* align 16*/);
+    
+    /* IDCT really*/
+    void (*idct)(DCTELEM *block/* align 16*/);
+    
+    /**
+     * block -> idct -> clip to unsigned 8 bit -> dest.
+     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+     * @param line_size size in bytes of a horizotal line of dest
+     */
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    
+    /**
+     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+     * @param line_size size in bytes of a horizotal line of dest
+     */
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    
+    /**
+     * idct input permutation.
+     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
+     * IDCT)
+     * this permutation must be performed before the idct_put/add, note, normally this can be merged
+     * with the zigzag/alternate scan<br>
+     * an example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
+     * - (x -> referece dct -> reference idct -> x)
+     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
+     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
+     */
+    uint8_t idct_permutation[64];
+    int idct_permutation_type;
+#define FF_NO_IDCT_PERM 1
+#define FF_LIBMPEG2_IDCT_PERM 2
+#define FF_SIMPLE_IDCT_PERM 3
+#define FF_TRANSPOSE_IDCT_PERM 4
+
+    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+
+    /**
+     * This function handles any initialization for the VP3 DSP functions.
+     */
+    void (*vp3_dsp_init)(void);
+
+    /** 
+     * This function is responsible for taking a block of zigzag'd,
+     * quantized DCT coefficients, reconstructing the original block of
+     * samples, and placing it into the output.
+     * @param input_data 64 zigzag'd, quantized DCT coefficients
+     * @param dequant_matrix 64 zigzag'd quantizer coefficients
+     * @param coeff_count index of the last coefficient
+     * @param dest the final output location where the transformed samples
+     * are to be placed
+     * @param stride the width in 8-bit samples of a line on this plane
+     */
+    void (*vp3_idct_put)(int16_t *input_data, int16_t *dequant_matrix,
+        int coeff_count, uint8_t *dest, int stride);
+
+    /** 
+     * This function is responsible for taking a block of zigzag'd,
+     * quantized DCT coefficients, reconstructing the original block of
+     * samples, and adding the transformed samples to an existing block of
+     * samples in the output.
+     * @param input_data 64 zigzag'd, quantized DCT coefficients
+     * @param dequant_matrix 64 zigzag'd quantizer coefficients
+     * @param coeff_count index of the last coefficient
+     * @param dest the final output location where the transformed samples
+     * are to be placed
+     * @param stride the width in 8-bit samples of a line on this plane
+     */
+    void (*vp3_idct_add)(int16_t *input_data, int16_t *dequant_matrix,
+        int coeff_count, uint8_t *dest, int stride);
+
+} DSPContext;
+
+void dsputil_static_init(void);
+//void dsputil_init(DSPContext* p, AVCodecContext *avctx);
+
+/**
+ * permute block according to permuatation.
+ * @param last last non zero element in scantable order
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
+
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
+
+#define	BYTE_VEC32(c)	((c)*0x01010101UL)
+
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+/**
+ * Empty mmx state.
+ * this must be called between any dsp function and float/double code.
+ * for example sin(); dsp->idct_put(); emms_c(); cos()
+ */
+#define emms_c()
+
+/* should be defined by architectures supporting
+   one or more MultiMedia extension */
+int mm_support(void);
+
+#if defined(HAVE_MMX)
+
+#undef emms_c
+
+#define MM_MMX    0x0001 /* standard MMX */
+#define MM_3DNOW  0x0004 /* AMD 3DNOW */
+#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
+#define MM_SSE    0x0008 /* SSE functions */
+#define MM_SSE2   0x0010 /* PIV SSE2 functions */
+
+extern int mm_flags;
+
+void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
+
+static inline void emms(void)
+{
+    __asm __volatile ("emms;":::"memory");
+}
+
+
+#define emms_c() \
+{\
+    if (mm_flags & MM_MMX)\
+        emms();\
+}
+
+#define __align8 __attribute__ ((aligned (8)))
+
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_ARMV4L)
+
+/* This is to use 4 bytes read to the IDCT pointers for some 'zero'
+   line ptimizations */
+#define __align8 __attribute__ ((aligned (4)))
+
+void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(HAVE_MLIB)
+
+/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
+#define __align8 __attribute__ ((aligned (8)))
+
+void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_ALPHA)
+
+#define __align8 __attribute__ ((aligned (8)))
+
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_POWERPC)
+
+#define MM_ALTIVEC    0x0001 /* standard AltiVec */
+
+extern int mm_flags;
+
+#if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN)
+#define pixel altivec_pixel
+#include <altivec.h>
+#undef pixel
+#endif
+
+#define __align8 __attribute__ ((aligned (16)))
+
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(HAVE_MMI)
+
+#define __align8 __attribute__ ((aligned (16)))
+
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
+
+#elif defined(ARCH_SH4)
+
+#define __align8 __attribute__ ((aligned (8)))
+
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+
+#else
+
+#define __align8
+
+#endif
+
+#ifdef __GNUC__
+
+struct unaligned_64 { uint64_t l; } __attribute__((packed));
+struct unaligned_32 { uint32_t l; } __attribute__((packed));
+struct unaligned_16 { uint16_t l; } __attribute__((packed));
+
+#define LD16(a) (((const struct unaligned_16 *) (a))->l)
+#define LD32(a) (((const struct unaligned_32 *) (a))->l)
+#define LD64(a) (((const struct unaligned_64 *) (a))->l)
+
+#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
+
+#else /* __GNUC__ */
+
+#define LD16(a) (*((uint16_t*)(a)))
+#define LD32(a) (*((uint32_t*)(a)))
+#define LD64(a) (*((uint64_t*)(a)))
+
+#define ST32(a, b) *((uint32_t*)(a)) = (b)
+
+#endif /* !__GNUC__ */
+
+/* PSNR */
+//void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
+//              int orig_linesize[3], int coded_linesize,
+//              AVCodecContext *avctx);
+
+/* FFT computation */
+
+/* NOTE: soon integer code will be added, so you must use the
+   FFTSample type */
+typedef float FFTSample;
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *exptab;
+    FFTComplex *exptab1; /* only used by SSE code */
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+} FFTContext;
+
+int fft_init(FFTContext *s, int nbits, int inverse);
+void fft_permute(FFTContext *s, FFTComplex *z);
+void fft_calc_c(FFTContext *s, FFTComplex *z);
+void fft_calc_sse(FFTContext *s, FFTComplex *z);
+void fft_calc_altivec(FFTContext *s, FFTComplex *z);
+
+static inline void fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+void fft_end(FFTContext *s);
+
+/* MDCT computation */
+
+typedef struct MDCTContext {
+    int n;  /* size of MDCT (i.e. number of input data * 2) */
+    int nbits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    FFTContext fft;
+} MDCTContext;
+
+int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
+void ff_imdct_calc(MDCTContext *s, FFTSample *output,
+                const FFTSample *input, FFTSample *tmp);
+void ff_mdct_calc(MDCTContext *s, FFTSample *out,
+               const FFTSample *input, FFTSample *tmp);
+void ff_mdct_end(MDCTContext *s);
+
+#define WARPER8_16(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    return name8(s, dst           , src           , stride, h)\
+          +name8(s, dst+8         , src+8         , stride, h);\
+}
+
+#define WARPER8_16_SQ(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    int score=0;\
+    score +=name8(s, dst           , src           , stride, 8);\
+    score +=name8(s, dst+8         , src+8         , stride, 8);\
+    if(h==16){\
+        dst += 8*stride;\
+        src += 8*stride;\
+        score +=name8(s, dst           , src           , stride, 8);\
+        score +=name8(s, dst+8         , src+8         , stride, 8);\
+    }\
+    return score;\
+}
+
+#ifndef HAVE_LRINTF
+/* XXX: add ISOC specific test to avoid specific BSD testing. */
+/* better than nothing implementation. */
+/* btw, rintf() is existing on fbsd too -- alex */
+static inline long int lrintf(float x)
+{
+#ifdef CONFIG_WIN32
+    /* XXX: incorrect, but make it compile */
+    return (int)(x);
+#else
+    return (int)(rint(x));
+#endif
+}
+#endif
+
+#endif
diff --git a/gst/ffmpegcolorspace/gstffmpeg.c b/gst/ffmpegcolorspace/gstffmpeg.c
new file mode 100644
index 0000000000..ba5bca0f09
--- /dev/null
+++ b/gst/ffmpegcolorspace/gstffmpeg.c
@@ -0,0 +1,46 @@
+/* GStreamer
+ * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/* First, include the header file for the plugin, to bring in the
+ * object definition and other useful things.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <gst/gst.h>
+#include <avcodec.h>
+
+extern gboolean gst_ffmpegcolorspace_register (GstPlugin * plugin);
+
+static gboolean
+plugin_init (GstPlugin * plugin)
+{
+  gst_ffmpegcolorspace_register (plugin);
+
+  /* Now we can return the pointer to the newly created Plugin object. */
+  return TRUE;
+}
+
+GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
+    GST_VERSION_MINOR,
+    "ffmpegcolorspace",
+    "colorspace conversion copied from FFMpeg",
+    plugin_init,
+    FFMPEG_VERSION, "LGPL", "FFMpeg", "http://ffmpeg.sourceforge.net/")
diff --git a/gst/ffmpegcolorspace/gstffmpegcodecmap.c b/gst/ffmpegcolorspace/gstffmpegcodecmap.c
new file mode 100644
index 0000000000..9eb886aa92
--- /dev/null
+++ b/gst/ffmpegcolorspace/gstffmpegcodecmap.c
@@ -0,0 +1,261 @@
+/* GStreamer
+ * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
+ * This file:
+ * Copyright (c) 2002-2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <gst/gst.h>
+#include <avcodec.h>
+#include <string.h>
+
+#include "gstffmpegcodecmap.h"
+
+/* this macro makes a caps width fixed or unfixed width/height
+ * properties depending on whether we've got a context.
+ *
+ * See below for why we use this.
+ */
+
+#define GST_FF_VID_CAPS_NEW(mimetype, props...)			\
+    gst_caps_new_simple (mimetype,			      	\
+	"width",     GST_TYPE_INT_RANGE, 16, 4096,      	\
+	"height",    GST_TYPE_INT_RANGE, 16, 4096,	      	\
+	"framerate", GST_TYPE_DOUBLE_RANGE, 0., G_MAXDOUBLE,	\
+	##props, NULL)
+
+/* Convert a FFMPEG Pixel Format and optional AVCodecContext
+ * to a GstCaps. If the context is ommitted, no fixed values
+ * for video/audio size will be included in the GstCaps
+ *
+ * See below for usefullness
+ */
+
+static GstCaps *
+gst_ffmpeg_pixfmt_to_caps (enum PixelFormat pix_fmt)
+{
+  GstCaps *caps = NULL;
+
+  int bpp = 0, depth = 0, endianness = 0;
+  gulong g_mask = 0, r_mask = 0, b_mask = 0;
+  guint32 fmt = 0;
+
+  switch (pix_fmt) {
+    case PIX_FMT_YUV420P:
+      fmt = GST_MAKE_FOURCC ('I', '4', '2', '0');
+      break;
+    case PIX_FMT_YUV422:
+      fmt = GST_MAKE_FOURCC ('Y', 'U', 'Y', '2');
+      break;
+    case PIX_FMT_RGB24:
+      bpp = depth = 24;
+      endianness = G_BIG_ENDIAN;
+      r_mask = 0xff0000;
+      g_mask = 0x00ff00;
+      b_mask = 0x0000ff;
+      break;
+    case PIX_FMT_BGR24:
+      bpp = depth = 24;
+      endianness = G_BIG_ENDIAN;
+      r_mask = 0x0000ff;
+      g_mask = 0x00ff00;
+      b_mask = 0xff0000;
+      break;
+    case PIX_FMT_YUV422P:
+      fmt = GST_MAKE_FOURCC ('Y', '4', '2', 'B');
+      break;
+    case PIX_FMT_YUV444P:
+      /* .. */
+      break;
+    case PIX_FMT_RGBA32:
+      bpp = 32;
+      depth = 24;
+      endianness = G_BIG_ENDIAN;
+#if (G_BYTE_ORDER == G_BIG_ENDIAN)
+      r_mask = 0x00ff0000;
+      g_mask = 0x0000ff00;
+      b_mask = 0x000000ff;
+#else
+      r_mask = 0x0000ff00;
+      g_mask = 0x00ff0000;
+      b_mask = 0xff000000;
+#endif
+      break;
+    case PIX_FMT_YUV410P:
+      fmt = GST_MAKE_FOURCC ('Y', 'U', 'V', '9');
+      break;
+    case PIX_FMT_YUV411P:
+      fmt = GST_MAKE_FOURCC ('Y', '4', '1', 'B');
+      break;
+    case PIX_FMT_RGB565:
+      bpp = depth = 16;
+      endianness = G_BYTE_ORDER;
+      r_mask = 0xf800;
+      g_mask = 0x07e0;
+      b_mask = 0x001f;
+      break;
+    case PIX_FMT_RGB555:
+      bpp = 16;
+      depth = 15;
+      endianness = G_BYTE_ORDER;
+      r_mask = 0x7c00;
+      g_mask = 0x03e0;
+      b_mask = 0x001f;
+      break;
+    default:
+      /* give up ... */
+      break;
+  }
+
+  if (bpp != 0) {
+    caps = GST_FF_VID_CAPS_NEW ("video/x-raw-rgb",
+	"bpp", G_TYPE_INT, bpp,
+	"depth", G_TYPE_INT, depth,
+	"red_mask", G_TYPE_INT, r_mask,
+	"green_mask", G_TYPE_INT, g_mask,
+	"blue_mask", G_TYPE_INT, b_mask,
+	"endianness", G_TYPE_INT, endianness, NULL);
+  } else if (fmt) {
+    caps = GST_FF_VID_CAPS_NEW ("video/x-raw-yuv",
+	"format", GST_TYPE_FOURCC, fmt, NULL);
+  }
+
+  if (caps != NULL) {
+    char *str = gst_caps_to_string (caps);
+
+    GST_DEBUG ("caps for pix_fmt=%d: %s", pix_fmt, str);
+    g_free (str);
+  } else {
+    GST_WARNING ("No caps found for pix_fmt=%d", pix_fmt);
+  }
+
+  return caps;
+}
+
+/* Convert a FFMPEG codec Type and optional AVCodecContext
+ * to a GstCaps. If the context is ommitted, no fixed values
+ * for video/audio size will be included in the GstCaps
+ *
+ * CodecType is primarily meant for uncompressed data GstCaps!
+ */
+
+GstCaps *
+gst_ffmpeg_pix_fmt_to_caps (void)
+{
+  GstCaps *caps, *temp;
+  enum PixelFormat i;
+
+  caps = gst_caps_new_empty ();
+  for (i = 0; i < PIX_FMT_NB; i++) {
+    temp = gst_ffmpeg_pixfmt_to_caps (i);
+    if (temp != NULL) {
+      gst_caps_append (caps, temp);
+    }
+  }
+
+  return caps;
+}
+
+/* Convert a GstCaps (video/raw) to a FFMPEG PixFmt
+ * and other video properties in a AVCodecContext.
+ *
+ * For usefullness, see below
+ */
+
+enum PixelFormat
+gst_ffmpeg_caps_to_pix_fmt (const GstCaps * caps,
+    int *width, int *height, double *framerate)
+{
+  GstStructure *structure;
+  enum PixelFormat pix_fmt = PIX_FMT_NB;
+
+  g_return_val_if_fail (gst_caps_get_size (caps) == 1, PIX_FMT_NB);
+  structure = gst_caps_get_structure (caps, 0);
+
+  gst_structure_get_int (structure, "width", width);
+  gst_structure_get_int (structure, "height", height);
+  gst_structure_get_double (structure, "framerate", framerate);
+
+  if (strcmp (gst_structure_get_name (structure), "video/x-raw-yuv") == 0) {
+    guint32 fourcc;
+
+    if (gst_structure_get_fourcc (structure, "format", &fourcc)) {
+      switch (fourcc) {
+	case GST_MAKE_FOURCC ('Y', 'U', 'Y', '2'):
+	  pix_fmt = PIX_FMT_YUV422;
+	  break;
+	case GST_MAKE_FOURCC ('I', '4', '2', '0'):
+	  pix_fmt = PIX_FMT_YUV420P;
+	  break;
+	case GST_MAKE_FOURCC ('Y', '4', '1', 'B'):
+	  pix_fmt = PIX_FMT_YUV411P;
+	  break;
+	case GST_MAKE_FOURCC ('Y', '4', '2', 'B'):
+	  pix_fmt = PIX_FMT_YUV422P;
+	  break;
+	case GST_MAKE_FOURCC ('Y', 'U', 'V', '9'):
+	  pix_fmt = PIX_FMT_YUV410P;
+	  break;
+#if 0
+	case FIXME:
+	  pix_fmt = PIX_FMT_YUV444P;
+	  break;
+#endif
+      }
+    }
+  } else if (strcmp (gst_structure_get_name (structure),
+	  "video/x-raw-rgb") == 0) {
+    gint bpp = 0, rmask = 0, endianness = 0;
+
+    if (gst_structure_get_int (structure, "bpp", &bpp) &&
+	gst_structure_get_int (structure, "endianness", &endianness) &&
+	gst_structure_get_int (structure, "red_mask", &rmask)) {
+      switch (bpp) {
+	case 32:
+#if (G_BYTE_ORDER == G_BIG_ENDIAN)
+	  if (rmask == 0x00ff0000)
+#else
+	  if (rmask == 0x0000ff00)
+#endif
+	    pix_fmt = PIX_FMT_RGBA32;
+	  break;
+	case 24:
+	  if (rmask == 0x0000FF)
+	    pix_fmt = PIX_FMT_BGR24;
+	  else
+	    pix_fmt = PIX_FMT_RGB24;
+	  break;
+	case 16:
+	  if (endianness == G_BYTE_ORDER)
+	    pix_fmt = PIX_FMT_RGB565;
+	  break;
+	case 15:
+	  if (endianness == G_BYTE_ORDER)
+	    pix_fmt = PIX_FMT_RGB555;
+	  break;
+	default:
+	  /* nothing */
+	  break;
+      }
+    }
+  }
+
+  return pix_fmt;
+}
diff --git a/gst/ffmpegcolorspace/gstffmpegcodecmap.h b/gst/ffmpegcolorspace/gstffmpegcodecmap.h
new file mode 100644
index 0000000000..48312d63ea
--- /dev/null
+++ b/gst/ffmpegcolorspace/gstffmpegcodecmap.h
@@ -0,0 +1,38 @@
+/* GStreamer
+ * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __GST_FFMPEG_CODECMAP_H__
+#define __GST_FFMPEG_CODECMAP_H__
+
+#include <avcodec.h>
+#include <gst/gst.h>
+
+/* Template caps */
+
+GstCaps *
+gst_ffmpeg_pix_fmt_to_caps (void);
+
+/* Disect a GstCaps */
+
+enum PixelFormat
+gst_ffmpeg_caps_to_pix_fmt (const GstCaps *caps,
+			    int *width, int *height,
+			    double *fps);
+
+#endif /* __GST_FFMPEG_CODECMAP_H__ */
diff --git a/gst/ffmpegcolorspace/gstffmpegcolorspace.c b/gst/ffmpegcolorspace/gstffmpegcolorspace.c
new file mode 100644
index 0000000000..6ddd8d8001
--- /dev/null
+++ b/gst/ffmpegcolorspace/gstffmpegcolorspace.c
@@ -0,0 +1,409 @@
+/* GStreamer
+ * Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
+ * This file:
+ * Copyright (C) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gst/gst.h>
+#include <avcodec.h>
+
+#include "gstffmpegcodecmap.h"
+
+#define GST_TYPE_FFMPEGCOLORSPACE \
+  (gst_ffmpegcolorspace_get_type())
+#define GST_FFMPEGCOLORSPACE(obj) \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_FFMPEGCOLORSPACE,GstFFMpegColorspace))
+#define GST_FFMPEGCOLORSPACE_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_FFMPEGCOLORSPACE,GstFFMpegColorspace))
+#define GST_IS_FFMPEGCOLORSPACE(obj) \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_FFMPEGCOLORSPACE))
+#define GST_IS_FFMPEGCOLORSPACE_CLASS(obj) \
+  (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_FFMPEGCOLORSPACE))
+
+typedef struct _GstFFMpegColorspace GstFFMpegColorspace;
+typedef struct _GstFFMpegColorspaceClass GstFFMpegColorspaceClass;
+
+struct _GstFFMpegColorspace
+{
+  GstElement element;
+
+  GstPad *sinkpad, *srcpad;
+
+  gint width, height;
+  gfloat fps;
+  enum PixelFormat from_pixfmt, to_pixfmt;
+  AVPicture from_frame, to_frame;
+  GstCaps *sinkcaps;
+};
+
+struct _GstFFMpegColorspaceClass
+{
+  GstElementClass parent_class;
+};
+
+/* elementfactory information */
+static GstElementDetails ffmpegcolorspace_details = {
+  "FFMPEG Colorspace converter",
+  "Filter/Converter/Video",
+  "Converts video from one colorspace to another",
+  "Ronald Bultje <rbultje@ronald.bitfreak.net>",
+};
+
+
+/* Stereo signals and args */
+enum
+{
+  /* FILL ME */
+  LAST_SIGNAL
+};
+
+enum
+{
+  ARG_0,
+};
+
+static GType gst_ffmpegcolorspace_get_type (void);
+
+static void gst_ffmpegcolorspace_base_init (GstFFMpegColorspaceClass * klass);
+static void gst_ffmpegcolorspace_class_init (GstFFMpegColorspaceClass * klass);
+static void gst_ffmpegcolorspace_init (GstFFMpegColorspace * space);
+
+static void gst_ffmpegcolorspace_set_property (GObject * object,
+    guint prop_id, const GValue * value, GParamSpec * pspec);
+static void gst_ffmpegcolorspace_get_property (GObject * object,
+    guint prop_id, GValue * value, GParamSpec * pspec);
+
+static GstPadLinkReturn
+gst_ffmpegcolorspace_pad_link (GstPad * pad, const GstCaps * caps);
+
+static void gst_ffmpegcolorspace_chain (GstPad * pad, GstData * data);
+static GstElementStateReturn
+gst_ffmpegcolorspace_change_state (GstElement * element);
+
+static GstPadTemplate *srctempl, *sinktempl;
+static GstElementClass *parent_class = NULL;
+
+/*static guint gst_ffmpegcolorspace_signals[LAST_SIGNAL] = { 0 }; */
+
+
+static GstCaps *
+gst_ffmpegcolorspace_caps_remove_format_info (GstCaps * caps)
+{
+  int i;
+  GstStructure *structure;
+  GstCaps *rgbcaps;
+
+  for (i = 0; i < gst_caps_get_size (caps); i++) {
+    structure = gst_caps_get_structure (caps, i);
+
+    gst_structure_set_name (structure, "video/x-raw-yuv");
+    gst_structure_remove_field (structure, "format");
+    gst_structure_remove_field (structure, "endianness");
+    gst_structure_remove_field (structure, "depth");
+    gst_structure_remove_field (structure, "bpp");
+    gst_structure_remove_field (structure, "red_mask");
+    gst_structure_remove_field (structure, "green_mask");
+    gst_structure_remove_field (structure, "blue_mask");
+  }
+
+  rgbcaps = gst_caps_simplify (caps);
+  gst_caps_free (caps);
+  caps = gst_caps_copy (rgbcaps);
+
+  for (i = 0; i < gst_caps_get_size (rgbcaps); i++) {
+    structure = gst_caps_get_structure (rgbcaps, i);
+
+    gst_structure_set_name (structure, "video/x-raw-rgb");
+  }
+
+  gst_caps_append (caps, rgbcaps);
+
+  return caps;
+}
+
+static GstCaps *
+gst_ffmpegcolorspace_getcaps (GstPad * pad)
+{
+  GstFFMpegColorspace *space;
+  GstCaps *othercaps;
+  GstCaps *caps;
+  GstPad *otherpad;
+
+  space = GST_FFMPEGCOLORSPACE (gst_pad_get_parent (pad));
+
+  otherpad = (pad == space->srcpad) ? space->sinkpad : space->srcpad;
+
+  othercaps = gst_pad_get_allowed_caps (otherpad);
+
+  othercaps = gst_ffmpegcolorspace_caps_remove_format_info (othercaps);
+
+  caps = gst_caps_intersect (othercaps, gst_pad_get_pad_template_caps (pad));
+  gst_caps_free (othercaps);
+
+  return caps;
+}
+
+static GstPadLinkReturn
+gst_ffmpegcolorspace_pad_link (GstPad * pad, const GstCaps * caps)
+{
+  GstFFMpegColorspace *space;
+  const GstCaps *othercaps;
+  GstPad *otherpad;
+  GstPadLinkReturn ret;
+  int height, width;
+  double framerate;
+  enum PixelFormat pix_fmt;
+
+  space = GST_FFMPEGCOLORSPACE (gst_pad_get_parent (pad));
+
+  otherpad = (pad == space->srcpad) ? space->sinkpad : space->srcpad;
+
+  /* FIXME attempt and/or check for passthru */
+
+  /* loop over all possibilities and select the first one we can convert and
+   * is accepted by the peer */
+  pix_fmt = gst_ffmpeg_caps_to_pix_fmt (caps, &width, &height, &framerate);
+  if (pix_fmt == PIX_FMT_NB) {
+    /* we disable ourself here */
+    if (pad == space->srcpad) {
+      space->to_pixfmt = PIX_FMT_NB;
+    } else {
+      space->from_pixfmt = PIX_FMT_NB;
+    }
+
+    return GST_PAD_LINK_REFUSED;
+  }
+
+  /* set the size on the otherpad */
+  othercaps = gst_pad_get_negotiated_caps (otherpad);
+  if (othercaps) {
+    GstCaps *caps = gst_caps_copy (othercaps);
+
+    gst_caps_set_simple (caps,
+	"width", G_TYPE_INT, width,
+	"height", G_TYPE_INT, height,
+	"framerate", G_TYPE_DOUBLE, framerate, NULL);
+    ret = gst_pad_try_set_caps (otherpad, caps);
+    if (GST_PAD_LINK_FAILED (ret)) {
+      return ret;
+    }
+  }
+
+  if (pad == space->srcpad) {
+    space->to_pixfmt = pix_fmt;
+  } else {
+    space->from_pixfmt = pix_fmt;
+  }
+
+  space->width = width;
+  space->height = height;
+
+  return GST_PAD_LINK_OK;
+}
+
+static GType
+gst_ffmpegcolorspace_get_type (void)
+{
+  static GType ffmpegcolorspace_type = 0;
+
+  if (!ffmpegcolorspace_type) {
+    static const GTypeInfo ffmpegcolorspace_info = {
+      sizeof (GstFFMpegColorspaceClass),
+      (GBaseInitFunc) gst_ffmpegcolorspace_base_init,
+      NULL,
+      (GClassInitFunc) gst_ffmpegcolorspace_class_init,
+      NULL,
+      NULL,
+      sizeof (GstFFMpegColorspace),
+      0,
+      (GInstanceInitFunc) gst_ffmpegcolorspace_init,
+    };
+
+    ffmpegcolorspace_type = g_type_register_static (GST_TYPE_ELEMENT,
+	"GstFFMpegColorspace", &ffmpegcolorspace_info, 0);
+  }
+
+  return ffmpegcolorspace_type;
+}
+
+static void
+gst_ffmpegcolorspace_base_init (GstFFMpegColorspaceClass * klass)
+{
+  GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
+
+  gst_element_class_add_pad_template (element_class, srctempl);
+  gst_element_class_add_pad_template (element_class, sinktempl);
+  gst_element_class_set_details (element_class, &ffmpegcolorspace_details);
+}
+
+static void
+gst_ffmpegcolorspace_class_init (GstFFMpegColorspaceClass * klass)
+{
+  GObjectClass *gobject_class;
+  GstElementClass *gstelement_class;
+
+  gobject_class = (GObjectClass *) klass;
+  gstelement_class = (GstElementClass *) klass;
+
+  parent_class = g_type_class_ref (GST_TYPE_ELEMENT);
+
+  gobject_class->set_property = gst_ffmpegcolorspace_set_property;
+  gobject_class->get_property = gst_ffmpegcolorspace_get_property;
+
+  gstelement_class->change_state = gst_ffmpegcolorspace_change_state;
+}
+
+static void
+gst_ffmpegcolorspace_init (GstFFMpegColorspace * space)
+{
+  space->sinkpad = gst_pad_new_from_template (sinktempl, "sink");
+  gst_pad_set_link_function (space->sinkpad, gst_ffmpegcolorspace_pad_link);
+  gst_pad_set_getcaps_function (space->sinkpad, gst_ffmpegcolorspace_getcaps);
+  gst_pad_set_chain_function (space->sinkpad, gst_ffmpegcolorspace_chain);
+  gst_element_add_pad (GST_ELEMENT (space), space->sinkpad);
+
+  space->srcpad = gst_pad_new_from_template (srctempl, "src");
+  gst_element_add_pad (GST_ELEMENT (space), space->srcpad);
+  gst_pad_set_link_function (space->srcpad, gst_ffmpegcolorspace_pad_link);
+  gst_pad_set_getcaps_function (space->srcpad, gst_ffmpegcolorspace_getcaps);
+
+  space->from_pixfmt = space->to_pixfmt = PIX_FMT_NB;
+}
+
+static void
+gst_ffmpegcolorspace_chain (GstPad * pad, GstData * data)
+{
+  GstBuffer *inbuf = GST_BUFFER (data);
+  GstFFMpegColorspace *space;
+  GstBuffer *outbuf = NULL;
+
+  g_return_if_fail (pad != NULL);
+  g_return_if_fail (GST_IS_PAD (pad));
+  g_return_if_fail (inbuf != NULL);
+
+  space = GST_FFMPEGCOLORSPACE (gst_pad_get_parent (pad));
+
+  g_return_if_fail (space != NULL);
+  g_return_if_fail (GST_IS_FFMPEGCOLORSPACE (space));
+
+  if (space->from_pixfmt == PIX_FMT_NB || space->to_pixfmt == PIX_FMT_NB) {
+    g_critical ("attempting to convert unknown formats");
+    gst_buffer_unref (inbuf);
+    return;
+  }
+
+  if (space->from_pixfmt == space->to_pixfmt) {
+    outbuf = inbuf;
+  } else {
+    /* use bufferpool here */
+    guint size = avpicture_get_size (space->to_pixfmt,
+	space->width,
+	space->height);
+
+    outbuf = gst_pad_alloc_buffer (space->srcpad, GST_BUFFER_OFFSET_NONE, size);
+
+    /* convert */
+    avpicture_fill ((AVPicture *) & space->from_frame, GST_BUFFER_DATA (inbuf),
+	space->from_pixfmt, space->width, space->height);
+    avpicture_fill ((AVPicture *) & space->to_frame, GST_BUFFER_DATA (outbuf),
+	space->to_pixfmt, space->width, space->height);
+    img_convert ((AVPicture *) & space->to_frame, space->to_pixfmt,
+	(AVPicture *) & space->from_frame, space->from_pixfmt,
+	space->width, space->height);
+
+    GST_BUFFER_TIMESTAMP (outbuf) = GST_BUFFER_TIMESTAMP (inbuf);
+    GST_BUFFER_DURATION (outbuf) = GST_BUFFER_DURATION (inbuf);
+
+    gst_buffer_unref (inbuf);
+  }
+
+  gst_pad_push (space->srcpad, GST_DATA (outbuf));
+}
+
+static GstElementStateReturn
+gst_ffmpegcolorspace_change_state (GstElement * element)
+{
+  GstFFMpegColorspace *space;
+
+  space = GST_FFMPEGCOLORSPACE (element);
+
+  switch (GST_STATE_TRANSITION (element)) {
+    case GST_STATE_PAUSED_TO_READY:
+      break;
+  }
+
+  if (parent_class->change_state)
+    return parent_class->change_state (element);
+
+  return GST_STATE_SUCCESS;
+}
+
+static void
+gst_ffmpegcolorspace_set_property (GObject * object,
+    guint prop_id, const GValue * value, GParamSpec * pspec)
+{
+  GstFFMpegColorspace *space;
+
+  /* it's not null if we got it, but it might not be ours */
+  g_return_if_fail (GST_IS_FFMPEGCOLORSPACE (object));
+  space = GST_FFMPEGCOLORSPACE (object);
+
+  switch (prop_id) {
+    default:
+      break;
+  }
+}
+
+static void
+gst_ffmpegcolorspace_get_property (GObject * object,
+    guint prop_id, GValue * value, GParamSpec * pspec)
+{
+  GstFFMpegColorspace *space;
+
+  /* it's not null if we got it, but it might not be ours */
+  g_return_if_fail (GST_IS_FFMPEGCOLORSPACE (object));
+  space = GST_FFMPEGCOLORSPACE (object);
+
+  switch (prop_id) {
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+gboolean
+gst_ffmpegcolorspace_register (GstPlugin * plugin)
+{
+  GstCaps *caps;
+
+  /* template caps */
+  caps = gst_ffmpeg_pix_fmt_to_caps ();
+
+  /* build templates */
+  srctempl = gst_pad_template_new ("src",
+      GST_PAD_SRC, GST_PAD_ALWAYS, gst_caps_copy (caps));
+  sinktempl = gst_pad_template_new ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, caps);
+
+  avcodec_init ();
+  return gst_element_register (plugin, "ffmpegcolorspace",
+      GST_RANK_NONE, GST_TYPE_FFMPEGCOLORSPACE);
+}
diff --git a/gst/ffmpegcolorspace/imgconvert.c b/gst/ffmpegcolorspace/imgconvert.c
new file mode 100644
index 0000000000..a85e0f0a27
--- /dev/null
+++ b/gst/ffmpegcolorspace/imgconvert.c
@@ -0,0 +1,2155 @@
+/*
+ * Misc image convertion routines
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/**
+ * @file imgconvert.c
+ * Misc image convertion routines.
+ */
+
+/* TODO:
+ * - write 'ffimg' program to test all the image related stuff
+ * - move all api to slice based system
+ * - integrate deinterlacing, postprocessing and scaling in the conversion process
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "avcodec.h"
+#include <math.h>
+#include "dsputil.h"
+
+#include <string.h>
+
+#ifdef HAVE_MMX
+#include "mmx.h"
+#endif
+
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+
+#define FF_COLOR_RGB      0	/* RGB color space */
+#define FF_COLOR_GRAY     1	/* gray color space */
+#define FF_COLOR_YUV      2	/* YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */
+#define FF_COLOR_YUV_JPEG 3	/* YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */
+
+#define FF_PIXEL_PLANAR   0	/* each channel has one component in AVPicture */
+#define FF_PIXEL_PACKED   1	/* only one components containing all the channels */
+#define FF_PIXEL_PALETTE  2	/* one components containing indexes for a palette */
+
+typedef struct PixFmtInfo
+{
+  const char *name;
+  uint8_t nb_channels;		/* number of channels (including alpha) */
+  uint8_t color_type;		/* color type (see FF_COLOR_xxx constants) */
+  uint8_t pixel_type;		/* pixel storage type (see FF_PIXEL_xxx constants) */
+  uint8_t is_alpha:1;		/* true if alpha can be specified */
+  uint8_t x_chroma_shift;	/* X chroma subsampling factor is 2 ^ shift */
+  uint8_t y_chroma_shift;	/* Y chroma subsampling factor is 2 ^ shift */
+  uint8_t depth;		/* bit depth of the color components */
+}
+PixFmtInfo;
+
+/* this table gives more information about formats */
+static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
+  /* YUV formats */
+  [PIX_FMT_YUV420P] = {
+	.name = "yuv420p",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_YUV,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+	.x_chroma_shift = 1,.y_chroma_shift = 1,
+      },
+  [PIX_FMT_YUV422P] = {
+	.name = "yuv422p",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_YUV,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+	.x_chroma_shift = 1,.y_chroma_shift = 0,
+      },
+  [PIX_FMT_YUV444P] = {
+	.name = "yuv444p",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_YUV,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+	.x_chroma_shift = 0,.y_chroma_shift = 0,
+      },
+  [PIX_FMT_YUV422] = {
+	.name = "yuv422",
+	.nb_channels = 1,
+	.color_type = FF_COLOR_YUV,
+	.pixel_type = FF_PIXEL_PACKED,
+	.depth = 8,
+	.x_chroma_shift = 1,.y_chroma_shift = 0,
+      },
+  [PIX_FMT_YUV410P] = {
+	.name = "yuv410p",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_YUV,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+	.x_chroma_shift = 2,.y_chroma_shift = 2,
+      },
+  [PIX_FMT_YUV411P] = {
+	.name = "yuv411p",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_YUV,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+	.x_chroma_shift = 2,.y_chroma_shift = 0,
+      },
+
+  /* JPEG YUV */
+  [PIX_FMT_YUVJ420P] = {
+	.name = "yuvj420p",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_YUV_JPEG,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+	.x_chroma_shift = 1,.y_chroma_shift = 1,
+      },
+  [PIX_FMT_YUVJ422P] = {
+	.name = "yuvj422p",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_YUV_JPEG,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+	.x_chroma_shift = 1,.y_chroma_shift = 0,
+      },
+  [PIX_FMT_YUVJ444P] = {
+	.name = "yuvj444p",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_YUV_JPEG,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+	.x_chroma_shift = 0,.y_chroma_shift = 0,
+      },
+
+  /* RGB formats */
+  [PIX_FMT_RGB24] = {
+	.name = "rgb24",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_RGB,
+	.pixel_type = FF_PIXEL_PACKED,
+	.depth = 8,
+	.x_chroma_shift = 0,.y_chroma_shift = 0,
+      },
+  [PIX_FMT_BGR24] = {
+	.name = "bgr24",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_RGB,
+	.pixel_type = FF_PIXEL_PACKED,
+	.depth = 8,
+	.x_chroma_shift = 0,.y_chroma_shift = 0,
+      },
+  [PIX_FMT_RGBA32] = {
+	.name = "rgba32",
+	.nb_channels = 4,.is_alpha = 1,
+	.color_type = FF_COLOR_RGB,
+	.pixel_type = FF_PIXEL_PACKED,
+	.depth = 8,
+	.x_chroma_shift = 0,.y_chroma_shift = 0,
+      },
+  [PIX_FMT_RGB565] = {
+	.name = "rgb565",
+	.nb_channels = 3,
+	.color_type = FF_COLOR_RGB,
+	.pixel_type = FF_PIXEL_PACKED,
+	.depth = 5,
+	.x_chroma_shift = 0,.y_chroma_shift = 0,
+      },
+  [PIX_FMT_RGB555] = {
+	.name = "rgb555",
+	.nb_channels = 4,.is_alpha = 1,
+	.color_type = FF_COLOR_RGB,
+	.pixel_type = FF_PIXEL_PACKED,
+	.depth = 5,
+	.x_chroma_shift = 0,.y_chroma_shift = 0,
+      },
+
+  /* gray / mono formats */
+  [PIX_FMT_GRAY8] = {
+	.name = "gray",
+	.nb_channels = 1,
+	.color_type = FF_COLOR_GRAY,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 8,
+      },
+  [PIX_FMT_MONOWHITE] = {
+	.name = "monow",
+	.nb_channels = 1,
+	.color_type = FF_COLOR_GRAY,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 1,
+      },
+  [PIX_FMT_MONOBLACK] = {
+	.name = "monob",
+	.nb_channels = 1,
+	.color_type = FF_COLOR_GRAY,
+	.pixel_type = FF_PIXEL_PLANAR,
+	.depth = 1,
+      },
+
+  /* paletted formats */
+  [PIX_FMT_PAL8] = {
+	.name = "pal8",
+	.nb_channels = 4,.is_alpha = 1,
+	.color_type = FF_COLOR_RGB,
+	.pixel_type = FF_PIXEL_PALETTE,
+	.depth = 8,
+      },
+};
+
+void
+avcodec_get_chroma_sub_sample (int pix_fmt, int *h_shift, int *v_shift)
+{
+  *h_shift = pix_fmt_info[pix_fmt].x_chroma_shift;
+  *v_shift = pix_fmt_info[pix_fmt].y_chroma_shift;
+}
+
+const char *
+avcodec_get_pix_fmt_name (int pix_fmt)
+{
+  if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB)
+    return "???";
+  else
+    return pix_fmt_info[pix_fmt].name;
+}
+
+enum PixelFormat
+avcodec_get_pix_fmt (const char *name)
+{
+  int i;
+
+  for (i = 0; i < PIX_FMT_NB; i++)
+    if (!strcmp (pix_fmt_info[i].name, name))
+      break;
+  return i;
+}
+
+/* Picture field are filled with 'ptr' addresses. Also return size */
+int
+avpicture_fill (AVPicture * picture, uint8_t * ptr,
+    int pix_fmt, int width, int height)
+{
+  int size, w2, h2, size2;
+  PixFmtInfo *pinfo;
+
+  pinfo = &pix_fmt_info[pix_fmt];
+  size = width * height;
+  switch (pix_fmt) {
+    case PIX_FMT_YUV420P:
+    case PIX_FMT_YUV422P:
+    case PIX_FMT_YUV444P:
+    case PIX_FMT_YUV410P:
+    case PIX_FMT_YUV411P:
+    case PIX_FMT_YUVJ420P:
+    case PIX_FMT_YUVJ422P:
+    case PIX_FMT_YUVJ444P:
+      w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift;
+      h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift;
+      size2 = w2 * h2;
+      picture->data[0] = ptr;
+      picture->data[1] = picture->data[0] + size;
+      picture->data[2] = picture->data[1] + size2;
+      picture->linesize[0] = width;
+      picture->linesize[1] = w2;
+      picture->linesize[2] = w2;
+      return size + 2 * size2;
+    case PIX_FMT_RGB24:
+    case PIX_FMT_BGR24:
+      picture->data[0] = ptr;
+      picture->data[1] = NULL;
+      picture->data[2] = NULL;
+      picture->linesize[0] = width * 3;
+      return size * 3;
+    case PIX_FMT_RGBA32:
+      picture->data[0] = ptr;
+      picture->data[1] = NULL;
+      picture->data[2] = NULL;
+      picture->linesize[0] = width * 4;
+      return size * 4;
+    case PIX_FMT_RGB555:
+    case PIX_FMT_RGB565:
+    case PIX_FMT_YUV422:
+      picture->data[0] = ptr;
+      picture->data[1] = NULL;
+      picture->data[2] = NULL;
+      picture->linesize[0] = width * 2;
+      return size * 2;
+    case PIX_FMT_GRAY8:
+      picture->data[0] = ptr;
+      picture->data[1] = NULL;
+      picture->data[2] = NULL;
+      picture->linesize[0] = width;
+      return size;
+    case PIX_FMT_MONOWHITE:
+    case PIX_FMT_MONOBLACK:
+      picture->data[0] = ptr;
+      picture->data[1] = NULL;
+      picture->data[2] = NULL;
+      picture->linesize[0] = (width + 7) >> 3;
+      return picture->linesize[0] * height;
+    case PIX_FMT_PAL8:
+      size2 = (size + 3) & ~3;
+      picture->data[0] = ptr;
+      picture->data[1] = ptr + size2;	/* palette is stored here as 256 32 bit words */
+      picture->data[2] = NULL;
+      picture->linesize[0] = width;
+      picture->linesize[1] = 4;
+      return size2 + 256 * 4;
+    default:
+      picture->data[0] = NULL;
+      picture->data[1] = NULL;
+      picture->data[2] = NULL;
+      picture->data[3] = NULL;
+      return -1;
+  }
+}
+
+int
+avpicture_layout (const AVPicture * src, int pix_fmt, int width, int height,
+    unsigned char *dest, int dest_size)
+{
+  PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
+  int i, j, w, h, data_planes;
+  const unsigned char *s;
+  int size = avpicture_get_size (pix_fmt, width, height);
+
+  if (size > dest_size)
+    return -1;
+
+  if (pf->pixel_type == FF_PIXEL_PACKED || pf->pixel_type == FF_PIXEL_PALETTE) {
+    if (pix_fmt == PIX_FMT_YUV422 || pix_fmt == PIX_FMT_RGB565 ||
+	pix_fmt == PIX_FMT_RGB555)
+      w = width * 2;
+    else if (pix_fmt == PIX_FMT_PAL8)
+      w = width;
+    else
+      w = width * (pf->depth * pf->nb_channels / 8);
+
+    data_planes = 1;
+    h = height;
+  } else {
+    data_planes = pf->nb_channels;
+    w = width;
+    h = height;
+  }
+
+  for (i = 0; i < data_planes; i++) {
+    if (i == 1) {
+      w = width >> pf->x_chroma_shift;
+      h = height >> pf->y_chroma_shift;
+    }
+    s = src->data[i];
+    for (j = 0; j < h; j++) {
+      memcpy (dest, s, w);
+      dest += w;
+      s += src->linesize[i];
+    }
+  }
+
+  if (pf->pixel_type == FF_PIXEL_PALETTE)
+    memcpy ((unsigned char *) (((size_t) dest + 3) & ~3), src->data[1],
+	256 * 4);
+
+  return size;
+}
+
+int
+avpicture_get_size (int pix_fmt, int width, int height)
+{
+  AVPicture dummy_pict;
+
+  return avpicture_fill (&dummy_pict, NULL, pix_fmt, width, height);
+}
+
+/**
+ * compute the loss when converting from a pixel format to another 
+ */
+int
+avcodec_get_pix_fmt_loss (int dst_pix_fmt, int src_pix_fmt, int has_alpha)
+{
+  const PixFmtInfo *pf, *ps;
+  int loss;
+
+  ps = &pix_fmt_info[src_pix_fmt];
+  pf = &pix_fmt_info[dst_pix_fmt];
+
+  /* compute loss */
+  loss = 0;
+  pf = &pix_fmt_info[dst_pix_fmt];
+  if (pf->depth < ps->depth ||
+      (dst_pix_fmt == PIX_FMT_RGB555 && src_pix_fmt == PIX_FMT_RGB565))
+    loss |= FF_LOSS_DEPTH;
+  if (pf->x_chroma_shift > ps->x_chroma_shift ||
+      pf->y_chroma_shift > ps->y_chroma_shift)
+    loss |= FF_LOSS_RESOLUTION;
+  switch (pf->color_type) {
+    case FF_COLOR_RGB:
+      if (ps->color_type != FF_COLOR_RGB && ps->color_type != FF_COLOR_GRAY)
+	loss |= FF_LOSS_COLORSPACE;
+      break;
+    case FF_COLOR_GRAY:
+      if (ps->color_type != FF_COLOR_GRAY)
+	loss |= FF_LOSS_COLORSPACE;
+      break;
+    case FF_COLOR_YUV:
+      if (ps->color_type != FF_COLOR_YUV)
+	loss |= FF_LOSS_COLORSPACE;
+      break;
+    case FF_COLOR_YUV_JPEG:
+      if (ps->color_type != FF_COLOR_YUV_JPEG &&
+	  ps->color_type != FF_COLOR_YUV && ps->color_type != FF_COLOR_GRAY)
+	loss |= FF_LOSS_COLORSPACE;
+      break;
+    default:
+      /* fail safe test */
+      if (ps->color_type != pf->color_type)
+	loss |= FF_LOSS_COLORSPACE;
+      break;
+  }
+  if (pf->color_type == FF_COLOR_GRAY && ps->color_type != FF_COLOR_GRAY)
+    loss |= FF_LOSS_CHROMA;
+  if (!pf->is_alpha && (ps->is_alpha && has_alpha))
+    loss |= FF_LOSS_ALPHA;
+  if (pf->pixel_type == FF_PIXEL_PALETTE &&
+      (ps->pixel_type != FF_PIXEL_PALETTE && ps->color_type != FF_COLOR_GRAY))
+    loss |= FF_LOSS_COLORQUANT;
+  return loss;
+}
+
+static int
+avg_bits_per_pixel (int pix_fmt)
+{
+  int bits;
+  const PixFmtInfo *pf;
+
+  pf = &pix_fmt_info[pix_fmt];
+  switch (pf->pixel_type) {
+    case FF_PIXEL_PACKED:
+      switch (pix_fmt) {
+	case PIX_FMT_YUV422:
+	case PIX_FMT_RGB565:
+	case PIX_FMT_RGB555:
+	  bits = 16;
+	  break;
+	default:
+	  bits = pf->depth * pf->nb_channels;
+	  break;
+      }
+      break;
+    case FF_PIXEL_PLANAR:
+      if (pf->x_chroma_shift == 0 && pf->y_chroma_shift == 0) {
+	bits = pf->depth * pf->nb_channels;
+      } else {
+	bits = pf->depth + ((2 * pf->depth) >>
+	    (pf->x_chroma_shift + pf->y_chroma_shift));
+      }
+      break;
+    case FF_PIXEL_PALETTE:
+      bits = 8;
+      break;
+    default:
+      bits = -1;
+      break;
+  }
+  return bits;
+}
+
+static int
+avcodec_find_best_pix_fmt1 (int pix_fmt_mask,
+    int src_pix_fmt, int has_alpha, int loss_mask)
+{
+  int dist, i, loss, min_dist, dst_pix_fmt;
+
+  /* find exact color match with smallest size */
+  dst_pix_fmt = -1;
+  min_dist = 0x7fffffff;
+  for (i = 0; i < PIX_FMT_NB; i++) {
+    if (pix_fmt_mask & (1 << i)) {
+      loss = avcodec_get_pix_fmt_loss (i, src_pix_fmt, has_alpha) & loss_mask;
+      if (loss == 0) {
+	dist = avg_bits_per_pixel (i);
+	if (dist < min_dist) {
+	  min_dist = dist;
+	  dst_pix_fmt = i;
+	}
+      }
+    }
+  }
+  return dst_pix_fmt;
+}
+
+/** 
+ * find best pixel format to convert to. Return -1 if none found 
+ */
+int
+avcodec_find_best_pix_fmt (int pix_fmt_mask, int src_pix_fmt,
+    int has_alpha, int *loss_ptr)
+{
+  int dst_pix_fmt, loss_mask, i;
+  static const int loss_mask_order[] = {
+    ~0,				/* no loss first */
+    ~FF_LOSS_ALPHA,
+    ~FF_LOSS_RESOLUTION,
+    ~(FF_LOSS_COLORSPACE | FF_LOSS_RESOLUTION),
+    ~FF_LOSS_COLORQUANT,
+    ~FF_LOSS_DEPTH,
+    0,
+  };
+
+  /* try with successive loss */
+  i = 0;
+  for (;;) {
+    loss_mask = loss_mask_order[i++];
+    dst_pix_fmt = avcodec_find_best_pix_fmt1 (pix_fmt_mask, src_pix_fmt,
+	has_alpha, loss_mask);
+    if (dst_pix_fmt >= 0)
+      goto found;
+    if (loss_mask == 0)
+      break;
+  }
+  return -1;
+found:
+  if (loss_ptr)
+    *loss_ptr = avcodec_get_pix_fmt_loss (dst_pix_fmt, src_pix_fmt, has_alpha);
+  return dst_pix_fmt;
+}
+
+static void
+img_copy_plane (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  for (; height > 0; height--) {
+    memcpy (dst, src, width);
+    dst += dst_wrap;
+    src += src_wrap;
+  }
+}
+
+/**
+ * Copy image 'src' to 'dst'.
+ */
+void
+img_copy (AVPicture * dst, const AVPicture * src,
+    int pix_fmt, int width, int height)
+{
+  int bwidth, bits, i;
+  PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
+
+  pf = &pix_fmt_info[pix_fmt];
+  switch (pf->pixel_type) {
+    case FF_PIXEL_PACKED:
+      switch (pix_fmt) {
+	case PIX_FMT_YUV422:
+	case PIX_FMT_RGB565:
+	case PIX_FMT_RGB555:
+	  bits = 16;
+	  break;
+	default:
+	  bits = pf->depth * pf->nb_channels;
+	  break;
+      }
+      bwidth = (width * bits + 7) >> 3;
+      img_copy_plane (dst->data[0], dst->linesize[0],
+	  src->data[0], src->linesize[0], bwidth, height);
+      break;
+    case FF_PIXEL_PLANAR:
+      for (i = 0; i < pf->nb_channels; i++) {
+	int w, h;
+
+	w = width;
+	h = height;
+	if (i == 1 || i == 2) {
+	  w >>= pf->x_chroma_shift;
+	  h >>= pf->y_chroma_shift;
+	}
+	bwidth = (w * pf->depth + 7) >> 3;
+	img_copy_plane (dst->data[i], dst->linesize[i],
+	    src->data[i], src->linesize[i], bwidth, h);
+      }
+      break;
+    case FF_PIXEL_PALETTE:
+      img_copy_plane (dst->data[0], dst->linesize[0],
+	  src->data[0], src->linesize[0], width, height);
+      /* copy the palette */
+      img_copy_plane (dst->data[1], dst->linesize[1],
+	  src->data[1], src->linesize[1], 4, 256);
+      break;
+  }
+}
+
+/* XXX: totally non optimized */
+
+static void
+yuv422_to_yuv420p (AVPicture * dst, const AVPicture * src,
+    int width, int height)
+{
+  const uint8_t *p, *p1;
+  uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+  int w;
+
+  p1 = src->data[0];
+  lum1 = dst->data[0];
+  cb1 = dst->data[1];
+  cr1 = dst->data[2];
+
+  for (; height >= 1; height -= 2) {
+    p = p1;
+    lum = lum1;
+    cb = cb1;
+    cr = cr1;
+    for (w = width; w >= 2; w -= 2) {
+      lum[0] = p[0];
+      cb[0] = p[1];
+      lum[1] = p[2];
+      cr[0] = p[3];
+      p += 4;
+      lum += 2;
+      cb++;
+      cr++;
+    }
+    if (w) {
+      lum[0] = p[0];
+      cb[0] = p[1];
+      cr[0] = p[3];
+      cb++;
+      cr++;
+    }
+    p1 += src->linesize[0];
+    lum1 += dst->linesize[0];
+    if (height > 1) {
+      p = p1;
+      lum = lum1;
+      for (w = width; w >= 2; w -= 2) {
+	lum[0] = p[0];
+	lum[1] = p[2];
+	p += 4;
+	lum += 2;
+      }
+      if (w) {
+	lum[0] = p[0];
+      }
+      p1 += src->linesize[0];
+      lum1 += dst->linesize[0];
+    }
+    cb1 += dst->linesize[1];
+    cr1 += dst->linesize[2];
+  }
+}
+
+static void
+yuv422_to_yuv422p (AVPicture * dst, const AVPicture * src,
+    int width, int height)
+{
+  const uint8_t *p, *p1;
+  uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+  int w;
+
+  p1 = src->data[0];
+  lum1 = dst->data[0];
+  cb1 = dst->data[1];
+  cr1 = dst->data[2];
+  for (; height > 0; height--) {
+    p = p1;
+    lum = lum1;
+    cb = cb1;
+    cr = cr1;
+    for (w = width; w >= 2; w -= 2) {
+      lum[0] = p[0];
+      cb[0] = p[1];
+      lum[1] = p[2];
+      cr[0] = p[3];
+      p += 4;
+      lum += 2;
+      cb++;
+      cr++;
+    }
+    p1 += src->linesize[0];
+    lum1 += dst->linesize[0];
+    cb1 += dst->linesize[1];
+    cr1 += dst->linesize[2];
+  }
+}
+
+static void
+yuv422p_to_yuv422 (AVPicture * dst, const AVPicture * src,
+    int width, int height)
+{
+  uint8_t *p, *p1;
+  const uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
+  int w;
+
+  p1 = dst->data[0];
+  lum1 = src->data[0];
+  cb1 = src->data[1];
+  cr1 = src->data[2];
+  for (; height > 0; height--) {
+    p = p1;
+    lum = lum1;
+    cb = cb1;
+    cr = cr1;
+    for (w = width; w >= 2; w -= 2) {
+      p[0] = lum[0];
+      p[1] = cb[0];
+      p[2] = lum[1];
+      p[3] = cr[0];
+      p += 4;
+      lum += 2;
+      cb++;
+      cr++;
+    }
+    p1 += dst->linesize[0];
+    lum1 += src->linesize[0];
+    cb1 += src->linesize[1];
+    cr1 += src->linesize[2];
+  }
+}
+
+#define SCALEBITS 10
+#define ONE_HALF  (1 << (SCALEBITS - 1))
+#define FIX(x)	  ((int) ((x) * (1<<SCALEBITS) + 0.5))
+
+#define YUV_TO_RGB1_CCIR(cb1, cr1)\
+{\
+    cb = (cb1) - 128;\
+    cr = (cr1) - 128;\
+    r_add = FIX(1.40200*255.0/224.0) * cr + ONE_HALF;\
+    g_add = - FIX(0.34414*255.0/224.0) * cb - FIX(0.71414*255.0/224.0) * cr + \
+            ONE_HALF;\
+    b_add = FIX(1.77200*255.0/224.0) * cb + ONE_HALF;\
+}
+
+#define YUV_TO_RGB2_CCIR(r, g, b, y1)\
+{\
+    y = ((y1) - 16) * FIX(255.0/219.0);\
+    r = cm[(y + r_add) >> SCALEBITS];\
+    g = cm[(y + g_add) >> SCALEBITS];\
+    b = cm[(y + b_add) >> SCALEBITS];\
+}
+
+#define YUV_TO_RGB1(cb1, cr1)\
+{\
+    cb = (cb1) - 128;\
+    cr = (cr1) - 128;\
+    r_add = FIX(1.40200) * cr + ONE_HALF;\
+    g_add = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;\
+    b_add = FIX(1.77200) * cb + ONE_HALF;\
+}
+
+#define YUV_TO_RGB2(r, g, b, y1)\
+{\
+    y = (y1) << SCALEBITS;\
+    r = cm[(y + r_add) >> SCALEBITS];\
+    g = cm[(y + g_add) >> SCALEBITS];\
+    b = cm[(y + b_add) >> SCALEBITS];\
+}
+
+#define Y_CCIR_TO_JPEG(y)\
+ cm[((y) * FIX(255.0/219.0) + (ONE_HALF - 16 * FIX(255.0/219.0))) >> SCALEBITS]
+
+#define Y_JPEG_TO_CCIR(y)\
+ (((y) * FIX(219.0/255.0) + (ONE_HALF + (16 << SCALEBITS))) >> SCALEBITS)
+
+#define C_CCIR_TO_JPEG(y)\
+ cm[(((y) - 128) * FIX(127.0/112.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS]
+
+/* NOTE: the clamp is really necessary! */
+static inline int
+C_JPEG_TO_CCIR (int y)
+{
+  y = (((y - 128) * FIX (112.0 / 127.0) + (ONE_HALF +
+	      (128 << SCALEBITS))) >> SCALEBITS);
+  if (y < 16)
+    y = 16;
+  return y;
+}
+
+
+#define RGB_TO_Y(r, g, b) \
+((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \
+  FIX(0.11400) * (b) + ONE_HALF) >> SCALEBITS)
+
+#define RGB_TO_U(r1, g1, b1, shift)\
+(((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +         \
+     FIX(0.50000) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+#define RGB_TO_V(r1, g1, b1, shift)\
+(((FIX(0.50000) * r1 - FIX(0.41869) * g1 -           \
+   FIX(0.08131) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+#define RGB_TO_Y_CCIR(r, g, b) \
+((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \
+  FIX(0.11400*219.0/255.0) * (b) + (ONE_HALF + (16 << SCALEBITS))) >> SCALEBITS)
+
+#define RGB_TO_U_CCIR(r1, g1, b1, shift)\
+(((- FIX(0.16874*224.0/255.0) * r1 - FIX(0.33126*224.0/255.0) * g1 +         \
+     FIX(0.50000*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+#define RGB_TO_V_CCIR(r1, g1, b1, shift)\
+(((FIX(0.50000*224.0/255.0) * r1 - FIX(0.41869*224.0/255.0) * g1 -           \
+   FIX(0.08131*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128)
+
+static uint8_t y_ccir_to_jpeg[256];
+static uint8_t y_jpeg_to_ccir[256];
+static uint8_t c_ccir_to_jpeg[256];
+static uint8_t c_jpeg_to_ccir[256];
+
+/* init various conversion tables */
+static void
+img_convert_init (void)
+{
+  int i;
+  uint8_t *cm = cropTbl + MAX_NEG_CROP;
+
+  for (i = 0; i < 256; i++) {
+    y_ccir_to_jpeg[i] = Y_CCIR_TO_JPEG (i);
+    y_jpeg_to_ccir[i] = Y_JPEG_TO_CCIR (i);
+    c_ccir_to_jpeg[i] = C_CCIR_TO_JPEG (i);
+    c_jpeg_to_ccir[i] = C_JPEG_TO_CCIR (i);
+  }
+}
+
+/* apply to each pixel the given table */
+static void
+img_apply_table (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap,
+    int width, int height, const uint8_t * table1)
+{
+  int n;
+  const uint8_t *s;
+  uint8_t *d;
+  const uint8_t *table;
+
+  table = table1;
+  for (; height > 0; height--) {
+    s = src;
+    d = dst;
+    n = width;
+    while (n >= 4) {
+      d[0] = table[s[0]];
+      d[1] = table[s[1]];
+      d[2] = table[s[2]];
+      d[3] = table[s[3]];
+      d += 4;
+      s += 4;
+      n -= 4;
+    }
+    while (n > 0) {
+      d[0] = table[s[0]];
+      d++;
+      s++;
+      n--;
+    }
+    dst += dst_wrap;
+    src += src_wrap;
+  }
+}
+
+/* XXX: use generic filter ? */
+/* XXX: in most cases, the sampling position is incorrect */
+
+/* 4x1 -> 1x1 */
+static void
+shrink41 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  int w;
+  const uint8_t *s;
+  uint8_t *d;
+
+  for (; height > 0; height--) {
+    s = src;
+    d = dst;
+    for (w = width; w > 0; w--) {
+      d[0] = (s[0] + s[1] + s[2] + s[3] + 2) >> 2;
+      s += 4;
+      d++;
+    }
+    src += src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+/* 2x1 -> 1x1 */
+static void
+shrink21 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  int w;
+  const uint8_t *s;
+  uint8_t *d;
+
+  for (; height > 0; height--) {
+    s = src;
+    d = dst;
+    for (w = width; w > 0; w--) {
+      d[0] = (s[0] + s[1]) >> 1;
+      s += 2;
+      d++;
+    }
+    src += src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+/* 1x2 -> 1x1 */
+static void
+shrink12 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  int w;
+  uint8_t *d;
+  const uint8_t *s1, *s2;
+
+  for (; height > 0; height--) {
+    s1 = src;
+    s2 = s1 + src_wrap;
+    d = dst;
+    for (w = width; w >= 4; w -= 4) {
+      d[0] = (s1[0] + s2[0]) >> 1;
+      d[1] = (s1[1] + s2[1]) >> 1;
+      d[2] = (s1[2] + s2[2]) >> 1;
+      d[3] = (s1[3] + s2[3]) >> 1;
+      s1 += 4;
+      s2 += 4;
+      d += 4;
+    }
+    for (; w > 0; w--) {
+      d[0] = (s1[0] + s2[0]) >> 1;
+      s1++;
+      s2++;
+      d++;
+    }
+    src += 2 * src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+/* 2x2 -> 1x1 */
+static void
+shrink22 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  int w;
+  const uint8_t *s1, *s2;
+  uint8_t *d;
+
+  for (; height > 0; height--) {
+    s1 = src;
+    s2 = s1 + src_wrap;
+    d = dst;
+    for (w = width; w >= 4; w -= 4) {
+      d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2;
+      d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 2;
+      d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 2;
+      d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 2;
+      s1 += 8;
+      s2 += 8;
+      d += 4;
+    }
+    for (; w > 0; w--) {
+      d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2;
+      s1 += 2;
+      s2 += 2;
+      d++;
+    }
+    src += 2 * src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+/* 4x4 -> 1x1 */
+static void
+shrink44 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  int w;
+  const uint8_t *s1, *s2, *s3, *s4;
+  uint8_t *d;
+
+  for (; height > 0; height--) {
+    s1 = src;
+    s2 = s1 + src_wrap;
+    s3 = s2 + src_wrap;
+    s4 = s3 + src_wrap;
+    d = dst;
+    for (w = width; w > 0; w--) {
+      d[0] = (s1[0] + s1[1] + s1[2] + s1[3] +
+	  s2[0] + s2[1] + s2[2] + s2[3] +
+	  s3[0] + s3[1] + s3[2] + s3[3] +
+	  s4[0] + s4[1] + s4[2] + s4[3] + 8) >> 4;
+      s1 += 4;
+      s2 += 4;
+      s3 += 4;
+      s4 += 4;
+      d++;
+    }
+    src += 4 * src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+static void
+grow21_line (uint8_t * dst, const uint8_t * src, int width)
+{
+  int w;
+  const uint8_t *s1;
+  uint8_t *d;
+
+  s1 = src;
+  d = dst;
+  for (w = width; w >= 4; w -= 4) {
+    d[1] = d[0] = s1[0];
+    d[3] = d[2] = s1[1];
+    s1 += 2;
+    d += 4;
+  }
+  for (; w >= 2; w -= 2) {
+    d[1] = d[0] = s1[0];
+    s1++;
+    d += 2;
+  }
+  /* only needed if width is not a multiple of two */
+  /* XXX: veryfy that */
+  if (w) {
+    d[0] = s1[0];
+  }
+}
+
+static void
+grow41_line (uint8_t * dst, const uint8_t * src, int width)
+{
+  int w, v;
+  const uint8_t *s1;
+  uint8_t *d;
+
+  s1 = src;
+  d = dst;
+  for (w = width; w >= 4; w -= 4) {
+    v = s1[0];
+    d[0] = v;
+    d[1] = v;
+    d[2] = v;
+    d[3] = v;
+    s1++;
+    d += 4;
+  }
+}
+
+/* 1x1 -> 2x1 */
+static void
+grow21 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  for (; height > 0; height--) {
+    grow21_line (dst, src, width);
+    src += src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+/* 1x1 -> 2x2 */
+static void
+grow22 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  for (; height > 0; height--) {
+    grow21_line (dst, src, width);
+    if (height % 2)
+      src += src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+/* 1x1 -> 4x1 */
+static void
+grow41 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  for (; height > 0; height--) {
+    grow41_line (dst, src, width);
+    src += src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+/* 1x1 -> 4x4 */
+static void
+grow44 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  for (; height > 0; height--) {
+    grow41_line (dst, src, width);
+    if ((height & 3) == 1)
+      src += src_wrap;
+    dst += dst_wrap;
+  }
+}
+
+/* 1x2 -> 2x1 */
+static void
+conv411 (uint8_t * dst, int dst_wrap,
+    const uint8_t * src, int src_wrap, int width, int height)
+{
+  int w, c;
+  const uint8_t *s1, *s2;
+  uint8_t *d;
+
+  width >>= 1;
+
+  for (; height > 0; height--) {
+    s1 = src;
+    s2 = src + src_wrap;
+    d = dst;
+    for (w = width; w > 0; w--) {
+      c = (s1[0] + s2[0]) >> 1;
+      d[0] = c;
+      d[1] = c;
+      s1++;
+      s2++;
+      d += 2;
+    }
+    src += src_wrap * 2;
+    dst += dst_wrap;
+  }
+}
+
+/* XXX: add jpeg quantize code */
+
+#define TRANSP_INDEX (6*6*6)
+
+/* this is maybe slow, but allows for extensions */
+static inline unsigned char
+gif_clut_index (uint8_t r, uint8_t g, uint8_t b)
+{
+  return ((((r) / 47) % 6) * 6 * 6 + (((g) / 47) % 6) * 6 + (((b) / 47) % 6));
+}
+
+static void
+build_rgb_palette (uint8_t * palette, int has_alpha)
+{
+  uint32_t *pal;
+  static const uint8_t pal_value[6] = { 0x00, 0x33, 0x66, 0x99, 0xcc, 0xff };
+  int i, r, g, b;
+
+  pal = (uint32_t *) palette;
+  i = 0;
+  for (r = 0; r < 6; r++) {
+    for (g = 0; g < 6; g++) {
+      for (b = 0; b < 6; b++) {
+	pal[i++] = (0xff << 24) | (pal_value[r] << 16) |
+	    (pal_value[g] << 8) | pal_value[b];
+      }
+    }
+  }
+  if (has_alpha)
+    pal[i++] = 0;
+  while (i < 256)
+    pal[i++] = 0xff000000;
+}
+
+/* copy bit n to bits 0 ... n - 1 */
+static inline unsigned int
+bitcopy_n (unsigned int a, int n)
+{
+  int mask;
+
+  mask = (1 << n) - 1;
+  return (a & (0xff & ~mask)) | ((-((a >> n) & 1)) & mask);
+}
+
+/* rgb555 handling */
+
+#define RGB_NAME rgb555
+
+#define RGB_IN(r, g, b, s)\
+{\
+    unsigned int v = ((const uint16_t *)(s))[0];\
+    r = bitcopy_n(v >> (10 - 3), 3);\
+    g = bitcopy_n(v >> (5 - 3), 3);\
+    b = bitcopy_n(v << 3, 3);\
+}
+
+#define RGBA_IN(r, g, b, a, s)\
+{\
+    unsigned int v = ((const uint16_t *)(s))[0];\
+    r = bitcopy_n(v >> (10 - 3), 3);\
+    g = bitcopy_n(v >> (5 - 3), 3);\
+    b = bitcopy_n(v << 3, 3);\
+    a = (-(v >> 15)) & 0xff;\
+}
+
+#define RGBA_OUT(d, r, g, b, a)\
+{\
+    ((uint16_t *)(d))[0] = ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3) | \
+                           ((a << 8) & 0x8000);\
+}
+
+#define BPP 2
+
+#include "imgconvert_template.h"
+
+/* rgb565 handling */
+
+#define RGB_NAME rgb565
+
+#define RGB_IN(r, g, b, s)\
+{\
+    unsigned int v = ((const uint16_t *)(s))[0];\
+    r = bitcopy_n(v >> (11 - 3), 3);\
+    g = bitcopy_n(v >> (5 - 2), 2);\
+    b = bitcopy_n(v << 3, 3);\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+    ((uint16_t *)(d))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);\
+}
+
+#define BPP 2
+
+#include "imgconvert_template.h"
+
+/* bgr24 handling */
+
+#define RGB_NAME bgr24
+
+#define RGB_IN(r, g, b, s)\
+{\
+    b = (s)[0];\
+    g = (s)[1];\
+    r = (s)[2];\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+    (d)[0] = b;\
+    (d)[1] = g;\
+    (d)[2] = r;\
+}
+
+#define BPP 3
+
+#include "imgconvert_template.h"
+
+#undef RGB_IN
+#undef RGB_OUT
+#undef BPP
+
+/* rgb24 handling */
+
+#define RGB_NAME rgb24
+#define FMT_RGB24
+
+#define RGB_IN(r, g, b, s)\
+{\
+    r = (s)[0];\
+    g = (s)[1];\
+    b = (s)[2];\
+}
+
+#define RGB_OUT(d, r, g, b)\
+{\
+    (d)[0] = r;\
+    (d)[1] = g;\
+    (d)[2] = b;\
+}
+
+#define BPP 3
+
+#include "imgconvert_template.h"
+
+/* rgba32 handling */
+
+#define RGB_NAME rgba32
+#define FMT_RGBA32
+
+#define RGB_IN(r, g, b, s)\
+{\
+    unsigned int v = ((const uint32_t *)(s))[0];\
+    r = (v >> 16) & 0xff;\
+    g = (v >> 8) & 0xff;\
+    b = v & 0xff;\
+}
+
+#define RGBA_IN(r, g, b, a, s)\
+{\
+    unsigned int v = ((const uint32_t *)(s))[0];\
+    a = (v >> 24) & 0xff;\
+    r = (v >> 16) & 0xff;\
+    g = (v >> 8) & 0xff;\
+    b = v & 0xff;\
+}
+
+#define RGBA_OUT(d, r, g, b, a)\
+{\
+    ((uint32_t *)(d))[0] = (a << 24) | (r << 16) | (g << 8) | b;\
+}
+
+#define BPP 4
+
+#include "imgconvert_template.h"
+
+static void
+mono_to_gray (AVPicture * dst, const AVPicture * src,
+    int width, int height, int xor_mask)
+{
+  const unsigned char *p;
+  unsigned char *q;
+  int v, dst_wrap, src_wrap;
+  int y, w;
+
+  p = src->data[0];
+  src_wrap = src->linesize[0] - ((width + 7) >> 3);
+
+  q = dst->data[0];
+  dst_wrap = dst->linesize[0] - width;
+  for (y = 0; y < height; y++) {
+    w = width;
+    while (w >= 8) {
+      v = *p++ ^ xor_mask;
+      q[0] = -(v >> 7);
+      q[1] = -((v >> 6) & 1);
+      q[2] = -((v >> 5) & 1);
+      q[3] = -((v >> 4) & 1);
+      q[4] = -((v >> 3) & 1);
+      q[5] = -((v >> 2) & 1);
+      q[6] = -((v >> 1) & 1);
+      q[7] = -((v >> 0) & 1);
+      w -= 8;
+      q += 8;
+    }
+    if (w > 0) {
+      v = *p++ ^ xor_mask;
+      do {
+	q[0] = -((v >> 7) & 1);
+	q++;
+	v <<= 1;
+      } while (--w);
+    }
+    p += src_wrap;
+    q += dst_wrap;
+  }
+}
+
+static void
+monowhite_to_gray (AVPicture * dst, const AVPicture * src,
+    int width, int height)
+{
+  mono_to_gray (dst, src, width, height, 0xff);
+}
+
+static void
+monoblack_to_gray (AVPicture * dst, const AVPicture * src,
+    int width, int height)
+{
+  mono_to_gray (dst, src, width, height, 0x00);
+}
+
+static void
+gray_to_mono (AVPicture * dst, const AVPicture * src,
+    int width, int height, int xor_mask)
+{
+  int n;
+  const uint8_t *s;
+  uint8_t *d;
+  int j, b, v, n1, src_wrap, dst_wrap, y;
+
+  s = src->data[0];
+  src_wrap = src->linesize[0] - width;
+
+  d = dst->data[0];
+  dst_wrap = dst->linesize[0] - ((width + 7) >> 3);
+
+  for (y = 0; y < height; y++) {
+    n = width;
+    while (n >= 8) {
+      v = 0;
+      for (j = 0; j < 8; j++) {
+	b = s[0];
+	s++;
+	v = (v << 1) | (b >> 7);
+      }
+      d[0] = v ^ xor_mask;
+      d++;
+      n -= 8;
+    }
+    if (n > 0) {
+      n1 = n;
+      v = 0;
+      while (n > 0) {
+	b = s[0];
+	s++;
+	v = (v << 1) | (b >> 7);
+	n--;
+      }
+      d[0] = (v << (8 - (n1 & 7))) ^ xor_mask;
+      d++;
+    }
+    s += src_wrap;
+    d += dst_wrap;
+  }
+}
+
+static void
+gray_to_monowhite (AVPicture * dst, const AVPicture * src,
+    int width, int height)
+{
+  gray_to_mono (dst, src, width, height, 0xff);
+}
+
+static void
+gray_to_monoblack (AVPicture * dst, const AVPicture * src,
+    int width, int height)
+{
+  gray_to_mono (dst, src, width, height, 0x00);
+}
+
+typedef struct ConvertEntry
+{
+  void (*convert) (AVPicture * dst,
+      const AVPicture * src, int width, int height);
+}
+ConvertEntry;
+
+/* Add each new convertion function in this table. In order to be able
+   to convert from any format to any format, the following constraints
+   must be satisfied:
+
+   - all FF_COLOR_RGB formats must convert to and from PIX_FMT_RGB24 
+
+   - all FF_COLOR_GRAY formats must convert to and from PIX_FMT_GRAY8
+
+   - all FF_COLOR_RGB formats with alpha must convert to and from PIX_FMT_RGBA32
+
+   - PIX_FMT_YUV444P and PIX_FMT_YUVJ444P must convert to and from
+     PIX_FMT_RGB24.
+
+   - PIX_FMT_422 must convert to and from PIX_FMT_422P.
+
+   The other conversion functions are just optimisations for common cases.
+*/
+static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = {
+  [PIX_FMT_YUV420P] = {
+	[PIX_FMT_RGB555] = {
+	    .convert = yuv420p_to_rgb555},
+	[PIX_FMT_RGB565] = {
+	    .convert = yuv420p_to_rgb565},
+	[PIX_FMT_BGR24] = {
+	    .convert = yuv420p_to_bgr24},
+	[PIX_FMT_RGB24] = {
+	    .convert = yuv420p_to_rgb24},
+	[PIX_FMT_RGBA32] = {
+	    .convert = yuv420p_to_rgba32},
+      },
+  [PIX_FMT_YUV422P] = {
+	[PIX_FMT_YUV422] = {
+	      .convert = yuv422p_to_yuv422,
+	    },
+      },
+  [PIX_FMT_YUV444P] = {
+	[PIX_FMT_RGB24] = {
+	    .convert = yuv444p_to_rgb24},
+      },
+  [PIX_FMT_YUVJ420P] = {
+	[PIX_FMT_RGB555] = {
+	    .convert = yuvj420p_to_rgb555},
+	[PIX_FMT_RGB565] = {
+	    .convert = yuvj420p_to_rgb565},
+	[PIX_FMT_BGR24] = {
+	    .convert = yuvj420p_to_bgr24},
+	[PIX_FMT_RGB24] = {
+	    .convert = yuvj420p_to_rgb24},
+	[PIX_FMT_RGBA32] = {
+	    .convert = yuvj420p_to_rgba32},
+      },
+  [PIX_FMT_YUVJ444P] = {
+	[PIX_FMT_RGB24] = {
+	    .convert = yuvj444p_to_rgb24},
+      },
+  [PIX_FMT_YUV422] = {
+	[PIX_FMT_YUV420P] = {
+	      .convert = yuv422_to_yuv420p,
+	    },
+	[PIX_FMT_YUV422P] = {
+	      .convert = yuv422_to_yuv422p,
+	    },
+      },
+
+  [PIX_FMT_RGB24] = {
+	[PIX_FMT_YUV420P] = {
+	    .convert = rgb24_to_yuv420p},
+	[PIX_FMT_RGB565] = {
+	    .convert = rgb24_to_rgb565},
+	[PIX_FMT_RGB555] = {
+	    .convert = rgb24_to_rgb555},
+	[PIX_FMT_RGBA32] = {
+	    .convert = rgb24_to_rgba32},
+	[PIX_FMT_BGR24] = {
+	    .convert = rgb24_to_bgr24},
+	[PIX_FMT_GRAY8] = {
+	    .convert = rgb24_to_gray},
+	[PIX_FMT_PAL8] = {
+	    .convert = rgb24_to_pal8},
+	[PIX_FMT_YUV444P] = {
+	    .convert = rgb24_to_yuv444p},
+	[PIX_FMT_YUVJ420P] = {
+	    .convert = rgb24_to_yuvj420p},
+	[PIX_FMT_YUVJ444P] = {
+	    .convert = rgb24_to_yuvj444p},
+      },
+  [PIX_FMT_RGBA32] = {
+	[PIX_FMT_RGB24] = {
+	    .convert = rgba32_to_rgb24},
+	[PIX_FMT_RGB555] = {
+	    .convert = rgba32_to_rgb555},
+	[PIX_FMT_PAL8] = {
+	    .convert = rgba32_to_pal8},
+	[PIX_FMT_YUV420P] = {
+	    .convert = rgba32_to_yuv420p},
+	[PIX_FMT_GRAY8] = {
+	    .convert = rgba32_to_gray},
+      },
+  [PIX_FMT_BGR24] = {
+	[PIX_FMT_RGB24] = {
+	    .convert = bgr24_to_rgb24},
+	[PIX_FMT_YUV420P] = {
+	    .convert = bgr24_to_yuv420p},
+	[PIX_FMT_GRAY8] = {
+	    .convert = bgr24_to_gray},
+      },
+  [PIX_FMT_RGB555] = {
+	[PIX_FMT_RGB24] = {
+	    .convert = rgb555_to_rgb24},
+	[PIX_FMT_RGBA32] = {
+	    .convert = rgb555_to_rgba32},
+	[PIX_FMT_YUV420P] = {
+	    .convert = rgb555_to_yuv420p},
+	[PIX_FMT_GRAY8] = {
+	    .convert = rgb555_to_gray},
+      },
+  [PIX_FMT_RGB565] = {
+	[PIX_FMT_RGB24] = {
+	    .convert = rgb565_to_rgb24},
+	[PIX_FMT_YUV420P] = {
+	    .convert = rgb565_to_yuv420p},
+	[PIX_FMT_GRAY8] = {
+	    .convert = rgb565_to_gray},
+      },
+  [PIX_FMT_GRAY8] = {
+	[PIX_FMT_RGB555] = {
+	    .convert = gray_to_rgb555},
+	[PIX_FMT_RGB565] = {
+	    .convert = gray_to_rgb565},
+	[PIX_FMT_RGB24] = {
+	    .convert = gray_to_rgb24},
+	[PIX_FMT_BGR24] = {
+	    .convert = gray_to_bgr24},
+	[PIX_FMT_RGBA32] = {
+	    .convert = gray_to_rgba32},
+	[PIX_FMT_MONOWHITE] = {
+	    .convert = gray_to_monowhite},
+	[PIX_FMT_MONOBLACK] = {
+	    .convert = gray_to_monoblack},
+      },
+  [PIX_FMT_MONOWHITE] = {
+	[PIX_FMT_GRAY8] = {
+	    .convert = monowhite_to_gray},
+      },
+  [PIX_FMT_MONOBLACK] = {
+	[PIX_FMT_GRAY8] = {
+	    .convert = monoblack_to_gray},
+      },
+  [PIX_FMT_PAL8] = {
+	[PIX_FMT_RGB555] = {
+	    .convert = pal8_to_rgb555},
+	[PIX_FMT_RGB565] = {
+	    .convert = pal8_to_rgb565},
+	[PIX_FMT_BGR24] = {
+	    .convert = pal8_to_bgr24},
+	[PIX_FMT_RGB24] = {
+	    .convert = pal8_to_rgb24},
+	[PIX_FMT_RGBA32] = {
+	    .convert = pal8_to_rgba32},
+      },
+};
+
+int
+avpicture_alloc (AVPicture * picture, int pix_fmt, int width, int height)
+{
+  unsigned int size;
+  void *ptr;
+
+  size = avpicture_get_size (pix_fmt, width, height);
+  ptr = av_malloc (size);
+  if (!ptr)
+    goto fail;
+  avpicture_fill (picture, ptr, pix_fmt, width, height);
+  return 0;
+fail:
+  memset (picture, 0, sizeof (AVPicture));
+  return -1;
+}
+
+void
+avpicture_free (AVPicture * picture)
+{
+  av_free (picture->data[0]);
+}
+
+/* return true if yuv planar */
+static inline int
+is_yuv_planar (PixFmtInfo * ps)
+{
+  return (ps->color_type == FF_COLOR_YUV ||
+      ps->color_type == FF_COLOR_YUV_JPEG) && ps->pixel_type == FF_PIXEL_PLANAR;
+}
+
+/* XXX: always use linesize. Return -1 if not supported */
+int
+img_convert (AVPicture * dst, int dst_pix_fmt,
+    const AVPicture * src, int src_pix_fmt, int src_width, int src_height)
+{
+  static int inited;
+  int i, ret, dst_width, dst_height, int_pix_fmt;
+  PixFmtInfo *src_pix, *dst_pix;
+  ConvertEntry *ce;
+  AVPicture tmp1, *tmp = &tmp1;
+
+  if (src_pix_fmt < 0 || src_pix_fmt >= PIX_FMT_NB ||
+      dst_pix_fmt < 0 || dst_pix_fmt >= PIX_FMT_NB)
+    return -1;
+  if (src_width <= 0 || src_height <= 0)
+    return 0;
+
+  if (!inited) {
+    inited = 1;
+    img_convert_init ();
+  }
+
+  dst_width = src_width;
+  dst_height = src_height;
+
+  dst_pix = &pix_fmt_info[dst_pix_fmt];
+  src_pix = &pix_fmt_info[src_pix_fmt];
+  if (src_pix_fmt == dst_pix_fmt) {
+    /* no conversion needed: just copy */
+    img_copy (dst, src, dst_pix_fmt, dst_width, dst_height);
+    return 0;
+  }
+
+  ce = &convert_table[src_pix_fmt][dst_pix_fmt];
+  if (ce->convert) {
+    /* specific convertion routine */
+    ce->convert (dst, src, dst_width, dst_height);
+    return 0;
+  }
+
+  /* gray to YUV */
+  if (is_yuv_planar (dst_pix) && src_pix_fmt == PIX_FMT_GRAY8) {
+    int w, h, y;
+    uint8_t *d;
+
+    if (dst_pix->color_type == FF_COLOR_YUV_JPEG) {
+      img_copy_plane (dst->data[0], dst->linesize[0],
+	  src->data[0], src->linesize[0], dst_width, dst_height);
+    } else {
+      img_apply_table (dst->data[0], dst->linesize[0],
+	  src->data[0], src->linesize[0],
+	  dst_width, dst_height, y_jpeg_to_ccir);
+    }
+    /* fill U and V with 128 */
+    w = dst_width;
+    h = dst_height;
+    w >>= dst_pix->x_chroma_shift;
+    h >>= dst_pix->y_chroma_shift;
+    for (i = 1; i <= 2; i++) {
+      d = dst->data[i];
+      for (y = 0; y < h; y++) {
+	memset (d, 128, w);
+	d += dst->linesize[i];
+      }
+    }
+    return 0;
+  }
+
+  /* YUV to gray */
+  if (is_yuv_planar (src_pix) && dst_pix_fmt == PIX_FMT_GRAY8) {
+    if (src_pix->color_type == FF_COLOR_YUV_JPEG) {
+      img_copy_plane (dst->data[0], dst->linesize[0],
+	  src->data[0], src->linesize[0], dst_width, dst_height);
+    } else {
+      img_apply_table (dst->data[0], dst->linesize[0],
+	  src->data[0], src->linesize[0],
+	  dst_width, dst_height, y_ccir_to_jpeg);
+    }
+    return 0;
+  }
+
+  /* YUV to YUV planar */
+  if (is_yuv_planar (dst_pix) && is_yuv_planar (src_pix)) {
+    int x_shift, y_shift, w, h, xy_shift;
+    void (*resize_func) (uint8_t * dst, int dst_wrap,
+	const uint8_t * src, int src_wrap, int width, int height);
+
+    /* compute chroma size of the smallest dimensions */
+    w = dst_width;
+    h = dst_height;
+    if (dst_pix->x_chroma_shift >= src_pix->x_chroma_shift)
+      w >>= dst_pix->x_chroma_shift;
+    else
+      w >>= src_pix->x_chroma_shift;
+    if (dst_pix->y_chroma_shift >= src_pix->y_chroma_shift)
+      h >>= dst_pix->y_chroma_shift;
+    else
+      h >>= src_pix->y_chroma_shift;
+
+    x_shift = (dst_pix->x_chroma_shift - src_pix->x_chroma_shift);
+    y_shift = (dst_pix->y_chroma_shift - src_pix->y_chroma_shift);
+    xy_shift = ((x_shift & 0xf) << 4) | (y_shift & 0xf);
+    /* there must be filters for conversion at least from and to
+       YUV444 format */
+    switch (xy_shift) {
+      case 0x00:
+	resize_func = img_copy_plane;
+	break;
+      case 0x10:
+	resize_func = shrink21;
+	break;
+      case 0x20:
+	resize_func = shrink41;
+	break;
+      case 0x01:
+	resize_func = shrink12;
+	break;
+      case 0x11:
+	resize_func = shrink22;
+	break;
+      case 0x22:
+	resize_func = shrink44;
+	break;
+      case 0xf0:
+	resize_func = grow21;
+	break;
+      case 0xe0:
+	resize_func = grow41;
+	break;
+      case 0xff:
+	resize_func = grow22;
+	break;
+      case 0xee:
+	resize_func = grow44;
+	break;
+      case 0xf1:
+	resize_func = conv411;
+	break;
+      default:
+	/* currently not handled */
+	goto no_chroma_filter;
+    }
+
+    img_copy_plane (dst->data[0], dst->linesize[0],
+	src->data[0], src->linesize[0], dst_width, dst_height);
+
+    for (i = 1; i <= 2; i++)
+      resize_func (dst->data[i], dst->linesize[i],
+	  src->data[i], src->linesize[i],
+	  dst_width >> dst_pix->x_chroma_shift,
+	  dst_height >> dst_pix->y_chroma_shift);
+    /* if yuv color space conversion is needed, we do it here on
+       the destination image */
+    if (dst_pix->color_type != src_pix->color_type) {
+      const uint8_t *y_table, *c_table;
+
+      if (dst_pix->color_type == FF_COLOR_YUV) {
+	y_table = y_jpeg_to_ccir;
+	c_table = c_jpeg_to_ccir;
+      } else {
+	y_table = y_ccir_to_jpeg;
+	c_table = c_ccir_to_jpeg;
+      }
+      img_apply_table (dst->data[0], dst->linesize[0],
+	  dst->data[0], dst->linesize[0], dst_width, dst_height, y_table);
+
+      for (i = 1; i <= 2; i++)
+	img_apply_table (dst->data[i], dst->linesize[i],
+	    dst->data[i], dst->linesize[i],
+	    dst_width >> dst_pix->x_chroma_shift,
+	    dst_height >> dst_pix->y_chroma_shift, c_table);
+    }
+    return 0;
+  }
+no_chroma_filter:
+
+  /* try to use an intermediate format */
+  if (src_pix_fmt == PIX_FMT_YUV422 || dst_pix_fmt == PIX_FMT_YUV422) {
+    /* specific case: convert to YUV422P first */
+    int_pix_fmt = PIX_FMT_YUV422P;
+  } else if ((src_pix->color_type == FF_COLOR_GRAY &&
+	  src_pix_fmt != PIX_FMT_GRAY8) ||
+      (dst_pix->color_type == FF_COLOR_GRAY && dst_pix_fmt != PIX_FMT_GRAY8)) {
+    /* gray8 is the normalized format */
+    int_pix_fmt = PIX_FMT_GRAY8;
+  } else if ((is_yuv_planar (src_pix) &&
+	  src_pix_fmt != PIX_FMT_YUV444P && src_pix_fmt != PIX_FMT_YUVJ444P)) {
+    /* yuv444 is the normalized format */
+    if (src_pix->color_type == FF_COLOR_YUV_JPEG)
+      int_pix_fmt = PIX_FMT_YUVJ444P;
+    else
+      int_pix_fmt = PIX_FMT_YUV444P;
+  } else if ((is_yuv_planar (dst_pix) &&
+	  dst_pix_fmt != PIX_FMT_YUV444P && dst_pix_fmt != PIX_FMT_YUVJ444P)) {
+    /* yuv444 is the normalized format */
+    if (dst_pix->color_type == FF_COLOR_YUV_JPEG)
+      int_pix_fmt = PIX_FMT_YUVJ444P;
+    else
+      int_pix_fmt = PIX_FMT_YUV444P;
+  } else {
+    /* the two formats are rgb or gray8 or yuv[j]444p */
+    if (src_pix->is_alpha && dst_pix->is_alpha)
+      int_pix_fmt = PIX_FMT_RGBA32;
+    else
+      int_pix_fmt = PIX_FMT_RGB24;
+  }
+  if (avpicture_alloc (tmp, int_pix_fmt, dst_width, dst_height) < 0)
+    return -1;
+  ret = -1;
+  if (img_convert (tmp, int_pix_fmt,
+	  src, src_pix_fmt, src_width, src_height) < 0)
+    goto fail1;
+  if (img_convert (dst, dst_pix_fmt,
+	  tmp, int_pix_fmt, dst_width, dst_height) < 0)
+    goto fail1;
+  ret = 0;
+fail1:
+  avpicture_free (tmp);
+  return ret;
+}
+
+/* NOTE: we scan all the pixels to have an exact information */
+static int
+get_alpha_info_pal8 (const AVPicture * src, int width, int height)
+{
+  const unsigned char *p;
+  int src_wrap, ret, x, y;
+  unsigned int a;
+  uint32_t *palette = (uint32_t *) src->data[1];
+
+  p = src->data[0];
+  src_wrap = src->linesize[0] - width;
+  ret = 0;
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      a = palette[p[0]] >> 24;
+      if (a == 0x00) {
+	ret |= FF_ALPHA_TRANSP;
+      } else if (a != 0xff) {
+	ret |= FF_ALPHA_SEMI_TRANSP;
+      }
+      p++;
+    }
+    p += src_wrap;
+  }
+  return ret;
+}
+
+/**
+ * Tell if an image really has transparent alpha values.
+ * @return ored mask of FF_ALPHA_xxx constants
+ */
+int
+img_get_alpha_info (const AVPicture * src, int pix_fmt, int width, int height)
+{
+  PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
+  int ret;
+
+  pf = &pix_fmt_info[pix_fmt];
+  /* no alpha can be represented in format */
+  if (!pf->is_alpha)
+    return 0;
+  switch (pix_fmt) {
+    case PIX_FMT_RGBA32:
+      ret = get_alpha_info_rgba32 (src, width, height);
+      break;
+    case PIX_FMT_RGB555:
+      ret = get_alpha_info_rgb555 (src, width, height);
+      break;
+    case PIX_FMT_PAL8:
+      ret = get_alpha_info_pal8 (src, width, height);
+      break;
+    default:
+      /* we do not know, so everything is indicated */
+      ret = FF_ALPHA_TRANSP | FF_ALPHA_SEMI_TRANSP;
+      break;
+  }
+  return ret;
+}
+
+#ifdef HAVE_MMX
+#define DEINT_INPLACE_LINE_LUM \
+                    movd_m2r(lum_m4[0],mm0);\
+                    movd_m2r(lum_m3[0],mm1);\
+                    movd_m2r(lum_m2[0],mm2);\
+                    movd_m2r(lum_m1[0],mm3);\
+                    movd_m2r(lum[0],mm4);\
+                    punpcklbw_r2r(mm7,mm0);\
+                    movd_r2m(mm2,lum_m4[0]);\
+                    punpcklbw_r2r(mm7,mm1);\
+                    punpcklbw_r2r(mm7,mm2);\
+                    punpcklbw_r2r(mm7,mm3);\
+                    punpcklbw_r2r(mm7,mm4);\
+                    paddw_r2r(mm3,mm1);\
+                    psllw_i2r(1,mm2);\
+                    paddw_r2r(mm4,mm0);\
+                    psllw_i2r(2,mm1);\
+                    paddw_r2r(mm6,mm2);\
+                    paddw_r2r(mm2,mm1);\
+                    psubusw_r2r(mm0,mm1);\
+                    psrlw_i2r(3,mm1);\
+                    packuswb_r2r(mm7,mm1);\
+                    movd_r2m(mm1,lum_m2[0]);
+
+#define DEINT_LINE_LUM \
+                    movd_m2r(lum_m4[0],mm0);\
+                    movd_m2r(lum_m3[0],mm1);\
+                    movd_m2r(lum_m2[0],mm2);\
+                    movd_m2r(lum_m1[0],mm3);\
+                    movd_m2r(lum[0],mm4);\
+                    punpcklbw_r2r(mm7,mm0);\
+                    punpcklbw_r2r(mm7,mm1);\
+                    punpcklbw_r2r(mm7,mm2);\
+                    punpcklbw_r2r(mm7,mm3);\
+                    punpcklbw_r2r(mm7,mm4);\
+                    paddw_r2r(mm3,mm1);\
+                    psllw_i2r(1,mm2);\
+                    paddw_r2r(mm4,mm0);\
+                    psllw_i2r(2,mm1);\
+                    paddw_r2r(mm6,mm2);\
+                    paddw_r2r(mm2,mm1);\
+                    psubusw_r2r(mm0,mm1);\
+                    psrlw_i2r(3,mm1);\
+                    packuswb_r2r(mm7,mm1);\
+                    movd_r2m(mm1,dst[0]);
+#endif
+
+/* filter parameters: [-1 4 2 4 -1] // 8 */
+static void
+deinterlace_line (uint8_t * dst,
+    const uint8_t * lum_m4, const uint8_t * lum_m3,
+    const uint8_t * lum_m2, const uint8_t * lum_m1,
+    const uint8_t * lum, int size)
+{
+#ifndef HAVE_MMX
+  uint8_t *cm = cropTbl + MAX_NEG_CROP;
+  int sum;
+
+  for (; size > 0; size--) {
+    sum = -lum_m4[0];
+    sum += lum_m3[0] << 2;
+    sum += lum_m2[0] << 1;
+    sum += lum_m1[0] << 2;
+    sum += -lum[0];
+    dst[0] = cm[(sum + 4) >> 3];
+    lum_m4++;
+    lum_m3++;
+    lum_m2++;
+    lum_m1++;
+    lum++;
+    dst++;
+  }
+#else
+
+  {
+    mmx_t rounder;
+
+    rounder.uw[0] = 4;
+    rounder.uw[1] = 4;
+    rounder.uw[2] = 4;
+    rounder.uw[3] = 4;
+    pxor_r2r (mm7, mm7);
+    movq_m2r (rounder, mm6);
+  }
+  for (; size > 3; size -= 4) {
+    DEINT_LINE_LUM lum_m4 += 4;
+
+    lum_m3 += 4;
+    lum_m2 += 4;
+    lum_m1 += 4;
+    lum += 4;
+    dst += 4;
+  }
+#endif
+}
+static void
+deinterlace_line_inplace (uint8_t * lum_m4, uint8_t * lum_m3, uint8_t * lum_m2,
+    uint8_t * lum_m1, uint8_t * lum, int size)
+{
+#ifndef HAVE_MMX
+  uint8_t *cm = cropTbl + MAX_NEG_CROP;
+  int sum;
+
+  for (; size > 0; size--) {
+    sum = -lum_m4[0];
+    sum += lum_m3[0] << 2;
+    sum += lum_m2[0] << 1;
+    lum_m4[0] = lum_m2[0];
+    sum += lum_m1[0] << 2;
+    sum += -lum[0];
+    lum_m2[0] = cm[(sum + 4) >> 3];
+    lum_m4++;
+    lum_m3++;
+    lum_m2++;
+    lum_m1++;
+    lum++;
+  }
+#else
+
+  {
+    mmx_t rounder;
+
+    rounder.uw[0] = 4;
+    rounder.uw[1] = 4;
+    rounder.uw[2] = 4;
+    rounder.uw[3] = 4;
+    pxor_r2r (mm7, mm7);
+    movq_m2r (rounder, mm6);
+  }
+  for (; size > 3; size -= 4) {
+    DEINT_INPLACE_LINE_LUM lum_m4 += 4;
+
+    lum_m3 += 4;
+    lum_m2 += 4;
+    lum_m1 += 4;
+    lum += 4;
+  }
+#endif
+}
+
+/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
+   top field is copied as is, but the bottom field is deinterlaced
+   against the top field. */
+static void
+deinterlace_bottom_field (uint8_t * dst, int dst_wrap,
+    const uint8_t * src1, int src_wrap, int width, int height)
+{
+  const uint8_t *src_m2, *src_m1, *src_0, *src_p1, *src_p2;
+  int y;
+
+  src_m2 = src1;
+  src_m1 = src1;
+  src_0 = &src_m1[src_wrap];
+  src_p1 = &src_0[src_wrap];
+  src_p2 = &src_p1[src_wrap];
+  for (y = 0; y < (height - 2); y += 2) {
+    memcpy (dst, src_m1, width);
+    dst += dst_wrap;
+    deinterlace_line (dst, src_m2, src_m1, src_0, src_p1, src_p2, width);
+    src_m2 = src_0;
+    src_m1 = src_p1;
+    src_0 = src_p2;
+    src_p1 += 2 * src_wrap;
+    src_p2 += 2 * src_wrap;
+    dst += dst_wrap;
+  }
+  memcpy (dst, src_m1, width);
+  dst += dst_wrap;
+  /* do last line */
+  deinterlace_line (dst, src_m2, src_m1, src_0, src_0, src_0, width);
+}
+
+static void
+deinterlace_bottom_field_inplace (uint8_t * src1, int src_wrap,
+    int width, int height)
+{
+  uint8_t *src_m1, *src_0, *src_p1, *src_p2;
+  int y;
+  uint8_t *buf;
+
+  buf = (uint8_t *) av_malloc (width);
+
+  src_m1 = src1;
+  memcpy (buf, src_m1, width);
+  src_0 = &src_m1[src_wrap];
+  src_p1 = &src_0[src_wrap];
+  src_p2 = &src_p1[src_wrap];
+  for (y = 0; y < (height - 2); y += 2) {
+    deinterlace_line_inplace (buf, src_m1, src_0, src_p1, src_p2, width);
+    src_m1 = src_p1;
+    src_0 = src_p2;
+    src_p1 += 2 * src_wrap;
+    src_p2 += 2 * src_wrap;
+  }
+  /* do last line */
+  deinterlace_line_inplace (buf, src_m1, src_0, src_0, src_0, width);
+  av_free (buf);
+}
+
+
+/* deinterlace - if not supported return -1 */
+int
+avpicture_deinterlace (AVPicture * dst, const AVPicture * src,
+    int pix_fmt, int width, int height)
+{
+  int i;
+
+  if (pix_fmt != PIX_FMT_YUV420P &&
+      pix_fmt != PIX_FMT_YUV422P &&
+      pix_fmt != PIX_FMT_YUV444P && pix_fmt != PIX_FMT_YUV411P)
+    return -1;
+  if ((width & 3) != 0 || (height & 3) != 0)
+    return -1;
+
+  for (i = 0; i < 3; i++) {
+    if (i == 1) {
+      switch (pix_fmt) {
+	case PIX_FMT_YUV420P:
+	  width >>= 1;
+	  height >>= 1;
+	  break;
+	case PIX_FMT_YUV422P:
+	  width >>= 1;
+	  break;
+	case PIX_FMT_YUV411P:
+	  width >>= 2;
+	  break;
+	default:
+	  break;
+      }
+    }
+    if (src == dst) {
+      deinterlace_bottom_field_inplace (dst->data[i], dst->linesize[i],
+	  width, height);
+    } else {
+      deinterlace_bottom_field (dst->data[i], dst->linesize[i],
+	  src->data[i], src->linesize[i], width, height);
+    }
+  }
+#ifdef HAVE_MMX
+  emms ();
+#endif
+  return 0;
+}
+
+#undef FIX
diff --git a/gst/ffmpegcolorspace/imgconvert_template.h b/gst/ffmpegcolorspace/imgconvert_template.h
new file mode 100644
index 0000000000..cd5a7313c3
--- /dev/null
+++ b/gst/ffmpegcolorspace/imgconvert_template.h
@@ -0,0 +1,857 @@
+/*
+ * Templates for image convertion routines
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef RGB_OUT
+#define RGB_OUT(d, r, g, b) RGBA_OUT(d, r, g, b, 0xff)
+#endif
+
+static void glue(yuv420p_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                        int width, int height)
+{
+    const uint8_t *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr;
+    uint8_t *d, *d1, *d2;
+    int w, y, cb, cr, r_add, g_add, b_add, width2;
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
+    unsigned int r, g, b;
+
+    d = dst->data[0];
+    y1_ptr = src->data[0];
+    cb_ptr = src->data[1];
+    cr_ptr = src->data[2];
+    width2 = (width + 1) >> 1;
+    for(;height >= 2; height -= 2) {
+        d1 = d;
+        d2 = d + dst->linesize[0];
+        y2_ptr = y1_ptr + src->linesize[0];
+        for(w = width; w >= 2; w -= 2) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+            /* output 4 pixels */
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[1]);
+            RGB_OUT(d1 + BPP, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[0]);
+            RGB_OUT(d2, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[1]);
+            RGB_OUT(d2 + BPP, r, g, b);
+
+            d1 += 2 * BPP;
+            d2 += 2 * BPP;
+
+            y1_ptr += 2;
+            y2_ptr += 2;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        /* handle odd width */
+        if (w) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[0]);
+            RGB_OUT(d2, r, g, b);
+            d1 += BPP;
+            d2 += BPP;
+            y1_ptr++;
+            y2_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        d += 2 * dst->linesize[0];
+        y1_ptr += 2 * src->linesize[0] - width;
+        cb_ptr += src->linesize[1] - width2;
+        cr_ptr += src->linesize[2] - width2;
+    }
+    /* handle odd height */
+    if (height) {
+        d1 = d;
+        for(w = width; w >= 2; w -= 2) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+            /* output 2 pixels */
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[1]);
+            RGB_OUT(d1 + BPP, r, g, b);
+
+            d1 += 2 * BPP;
+
+            y1_ptr += 2;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        /* handle width */
+        if (w) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+            /* output 2 pixels */
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+            d1 += BPP;
+
+            y1_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+    }
+}
+
+static void glue(yuvj420p_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                         int width, int height)
+{
+    const uint8_t *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr;
+    uint8_t *d, *d1, *d2;
+    int w, y, cb, cr, r_add, g_add, b_add, width2;
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
+    unsigned int r, g, b;
+
+    d = dst->data[0];
+    y1_ptr = src->data[0];
+    cb_ptr = src->data[1];
+    cr_ptr = src->data[2];
+    width2 = (width + 1) >> 1;
+    for(;height >= 2; height -= 2) {
+        d1 = d;
+        d2 = d + dst->linesize[0];
+        y2_ptr = y1_ptr + src->linesize[0];
+        for(w = width; w >= 2; w -= 2) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+            /* output 4 pixels */
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y1_ptr[1]);
+            RGB_OUT(d1 + BPP, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y2_ptr[0]);
+            RGB_OUT(d2, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y2_ptr[1]);
+            RGB_OUT(d2 + BPP, r, g, b);
+
+            d1 += 2 * BPP;
+            d2 += 2 * BPP;
+
+            y1_ptr += 2;
+            y2_ptr += 2;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        /* handle odd width */
+        if (w) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y2_ptr[0]);
+            RGB_OUT(d2, r, g, b);
+            d1 += BPP;
+            d2 += BPP;
+            y1_ptr++;
+            y2_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        d += 2 * dst->linesize[0];
+        y1_ptr += 2 * src->linesize[0] - width;
+        cb_ptr += src->linesize[1] - width2;
+        cr_ptr += src->linesize[2] - width2;
+    }
+    /* handle odd height */
+    if (height) {
+        d1 = d;
+        for(w = width; w >= 2; w -= 2) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+            /* output 2 pixels */
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+
+            YUV_TO_RGB2(r, g, b, y1_ptr[1]);
+            RGB_OUT(d1 + BPP, r, g, b);
+
+            d1 += 2 * BPP;
+
+            y1_ptr += 2;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        /* handle width */
+        if (w) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+            /* output 2 pixels */
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+            d1 += BPP;
+
+            y1_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+    }
+}
+
+static void glue(RGB_NAME, _to_yuv420p)(AVPicture *dst, const AVPicture *src,
+                                        int width, int height)
+{
+    int wrap, wrap3, width2;
+    int r, g, b, r1, g1, b1, w;
+    uint8_t *lum, *cb, *cr;
+    const uint8_t *p;
+
+    lum = dst->data[0];
+    cb = dst->data[1];
+    cr = dst->data[2];
+
+    width2 = (width + 1) >> 1;
+    wrap = dst->linesize[0];
+    wrap3 = src->linesize[0];
+    p = src->data[0];
+    for(;height>=2;height -= 2) {
+        for(w = width; w >= 2; w -= 2) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y_CCIR(r, g, b);
+            p += wrap3;
+            lum += wrap;
+
+            RGB_IN(r, g, b, p);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y_CCIR(r, g, b);
+
+            cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 2);
+            cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 2);
+
+            cb++;
+            cr++;
+            p += -wrap3 + 2 * BPP;
+            lum += -wrap + 2;
+        }
+        if (w) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+            p += wrap3;
+            lum += wrap;
+            RGB_IN(r, g, b, p);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+            cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 1);
+            cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 1);
+            cb++;
+            cr++;
+            p += -wrap3 + BPP;
+            lum += -wrap + 1;
+        }
+        p += wrap3 + (wrap3 - width * BPP);
+        lum += wrap + (wrap - width);
+        cb += dst->linesize[1] - width2;
+        cr += dst->linesize[2] - width2;
+    }
+    /* handle odd height */
+    if (height) {
+        for(w = width; w >= 2; w -= 2) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y_CCIR(r, g, b);
+            cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 1);
+            cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 1);
+            cb++;
+            cr++;
+            p += 2 * BPP;
+           lum += 2;
+        }
+        if (w) {
+            RGB_IN(r, g, b, p);
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+            cb[0] = RGB_TO_U_CCIR(r, g, b, 0);
+            cr[0] = RGB_TO_V_CCIR(r, g, b, 0);
+        }
+    }
+}
+
+static void glue(RGB_NAME, _to_gray)(AVPicture *dst, const AVPicture *src,
+                                     int width, int height)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int r, g, b, dst_wrap, src_wrap;
+    int x, y;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - BPP * width;
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - width;
+
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            RGB_IN(r, g, b, p);
+            q[0] = RGB_TO_Y(r, g, b);
+            q++;
+            p += BPP;
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+}
+
+static void glue(gray_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                     int width, int height)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int r, dst_wrap, src_wrap;
+    int x, y;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - width;
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - BPP * width;
+
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            r = p[0];
+            RGB_OUT(q, r, r, r);
+            q += BPP;
+            p ++;
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+}
+
+static void glue(pal8_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                     int width, int height)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int r, g, b, dst_wrap, src_wrap;
+    int x, y;
+    uint32_t v;
+    const uint32_t *palette;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - width;
+    palette = (uint32_t *)src->data[1];
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - BPP * width;
+
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            v = palette[p[0]];
+            r = (v >> 16) & 0xff;
+            g = (v >> 8) & 0xff;
+            b = (v) & 0xff;
+#ifdef RGBA_OUT
+            {
+                int a;
+                a = (v >> 24) & 0xff;
+                RGBA_OUT(q, r, g, b, a);
+            }
+#else
+            RGB_OUT(q, r, g, b);
+#endif
+            q += BPP;
+            p ++;
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+}
+
+#if !defined(FMT_RGBA32) && defined(RGBA_OUT)
+/* alpha support */
+
+static void glue(rgba32_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                      int width, int height)
+{
+    const uint8_t *s;
+    uint8_t *d;
+    int src_wrap, dst_wrap, j, y;
+    unsigned int v, r, g, b, a;
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * 4;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * BPP;
+
+    for(y=0;y<height;y++) {
+        for(j = 0;j < width; j++) {
+            v = ((const uint32_t *)(s))[0];
+            a = (v >> 24) & 0xff;
+            r = (v >> 16) & 0xff;
+            g = (v >> 8) & 0xff;
+            b = v & 0xff;
+            RGBA_OUT(d, r, g, b, a);
+            s += 4;
+            d += BPP;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+static void glue(RGB_NAME, _to_rgba32)(AVPicture *dst, const AVPicture *src,
+                                       int width, int height)
+{
+    const uint8_t *s;
+    uint8_t *d;
+    int src_wrap, dst_wrap, j, y;
+    unsigned int r, g, b, a;
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * BPP;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * 4;
+
+    for(y=0;y<height;y++) {
+        for(j = 0;j < width; j++) {
+            RGBA_IN(r, g, b, a, s);
+            ((uint32_t *)(d))[0] = (a << 24) | (r << 16) | (g << 8) | b;
+            d += 4;
+            s += BPP;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+#endif /* !defined(FMT_RGBA32) && defined(RGBA_IN) */
+
+#ifndef FMT_RGB24
+
+static void glue(rgb24_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src,
+                                      int width, int height)
+{
+    const uint8_t *s;
+    uint8_t *d;
+    int src_wrap, dst_wrap, j, y;
+    unsigned int r, g, b;
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * 3;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * BPP;
+
+    for(y=0;y<height;y++) {
+        for(j = 0;j < width; j++) {
+            r = s[0];
+            g = s[1];
+            b = s[2];
+            RGB_OUT(d, r, g, b);
+            s += 3;
+            d += BPP;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+static void glue(RGB_NAME, _to_rgb24)(AVPicture *dst, const AVPicture *src,
+                                      int width, int height)
+{
+    const uint8_t *s;
+    uint8_t *d;
+    int src_wrap, dst_wrap, j, y;
+    unsigned int r, g , b;
+
+    s = src->data[0];
+    src_wrap = src->linesize[0] - width * BPP;
+
+    d = dst->data[0];
+    dst_wrap = dst->linesize[0] - width * 3;
+
+    for(y=0;y<height;y++) {
+        for(j = 0;j < width; j++) {
+            RGB_IN(r, g, b, s)
+            d[0] = r;
+            d[1] = g;
+            d[2] = b;
+            d += 3;
+            s += BPP;
+        }
+        s += src_wrap;
+        d += dst_wrap;
+    }
+}
+
+#endif /* !FMT_RGB24 */
+
+#ifdef FMT_RGB24
+
+static void yuv444p_to_rgb24(AVPicture *dst, const AVPicture *src,
+                             int width, int height)
+{
+    const uint8_t *y1_ptr, *cb_ptr, *cr_ptr;
+    uint8_t *d, *d1;
+    int w, y, cb, cr, r_add, g_add, b_add;
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
+    unsigned int r, g, b;
+
+    d = dst->data[0];
+    y1_ptr = src->data[0];
+    cb_ptr = src->data[1];
+    cr_ptr = src->data[2];
+    for(;height > 0; height --) {
+        d1 = d;
+        for(w = width; w > 0; w--) {
+            YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]);
+
+            YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+            d1 += BPP;
+
+            y1_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        d += dst->linesize[0];
+        y1_ptr += src->linesize[0] - width;
+        cb_ptr += src->linesize[1] - width;
+        cr_ptr += src->linesize[2] - width;
+    }
+}
+
+static void yuvj444p_to_rgb24(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    const uint8_t *y1_ptr, *cb_ptr, *cr_ptr;
+    uint8_t *d, *d1;
+    int w, y, cb, cr, r_add, g_add, b_add;
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;
+    unsigned int r, g, b;
+
+    d = dst->data[0];
+    y1_ptr = src->data[0];
+    cb_ptr = src->data[1];
+    cr_ptr = src->data[2];
+    for(;height > 0; height --) {
+        d1 = d;
+        for(w = width; w > 0; w--) {
+            YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]);
+
+            YUV_TO_RGB2(r, g, b, y1_ptr[0]);
+            RGB_OUT(d1, r, g, b);
+            d1 += BPP;
+
+            y1_ptr++;
+            cb_ptr++;
+            cr_ptr++;
+        }
+        d += dst->linesize[0];
+        y1_ptr += src->linesize[0] - width;
+        cb_ptr += src->linesize[1] - width;
+        cr_ptr += src->linesize[2] - width;
+    }
+}
+
+static void rgb24_to_yuv444p(AVPicture *dst, const AVPicture *src,
+                             int width, int height)
+{
+    int src_wrap, x, y;
+    int r, g, b;
+    uint8_t *lum, *cb, *cr;
+    const uint8_t *p;
+
+    lum = dst->data[0];
+    cb = dst->data[1];
+    cr = dst->data[2];
+
+    src_wrap = src->linesize[0] - width * BPP;
+    p = src->data[0];
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            RGB_IN(r, g, b, p);
+            lum[0] = RGB_TO_Y_CCIR(r, g, b);
+            cb[0] = RGB_TO_U_CCIR(r, g, b, 0);
+            cr[0] = RGB_TO_V_CCIR(r, g, b, 0);
+            p += BPP;
+            cb++;
+            cr++;
+            lum++;
+        }
+        p += src_wrap;
+        lum += dst->linesize[0] - width;
+        cb += dst->linesize[1] - width;
+        cr += dst->linesize[2] - width;
+    }
+}
+
+static void rgb24_to_yuvj420p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int wrap, wrap3, width2;
+    int r, g, b, r1, g1, b1, w;
+    uint8_t *lum, *cb, *cr;
+    const uint8_t *p;
+
+    lum = dst->data[0];
+    cb = dst->data[1];
+    cr = dst->data[2];
+
+    width2 = (width + 1) >> 1;
+    wrap = dst->linesize[0];
+    wrap3 = src->linesize[0];
+    p = src->data[0];
+    for(;height>=2;height -= 2) {
+        for(w = width; w >= 2; w -= 2) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y(r, g, b);
+            p += wrap3;
+            lum += wrap;
+
+            RGB_IN(r, g, b, p);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[0] = RGB_TO_Y(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y(r, g, b);
+
+            cb[0] = RGB_TO_U(r1, g1, b1, 2);
+            cr[0] = RGB_TO_V(r1, g1, b1, 2);
+
+            cb++;
+            cr++;
+            p += -wrap3 + 2 * BPP;
+            lum += -wrap + 2;
+        }
+        if (w) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y(r, g, b);
+            p += wrap3;
+            lum += wrap;
+            RGB_IN(r, g, b, p);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[0] = RGB_TO_Y(r, g, b);
+            cb[0] = RGB_TO_U(r1, g1, b1, 1);
+            cr[0] = RGB_TO_V(r1, g1, b1, 1);
+            cb++;
+            cr++;
+            p += -wrap3 + BPP;
+            lum += -wrap + 1;
+        }
+        p += wrap3 + (wrap3 - width * BPP);
+        lum += wrap + (wrap - width);
+        cb += dst->linesize[1] - width2;
+        cr += dst->linesize[2] - width2;
+    }
+    /* handle odd height */
+    if (height) {
+        for(w = width; w >= 2; w -= 2) {
+            RGB_IN(r, g, b, p);
+            r1 = r;
+            g1 = g;
+            b1 = b;
+            lum[0] = RGB_TO_Y(r, g, b);
+
+            RGB_IN(r, g, b, p + BPP);
+            r1 += r;
+            g1 += g;
+            b1 += b;
+            lum[1] = RGB_TO_Y(r, g, b);
+            cb[0] = RGB_TO_U(r1, g1, b1, 1);
+            cr[0] = RGB_TO_V(r1, g1, b1, 1);
+            cb++;
+            cr++;
+            p += 2 * BPP;
+           lum += 2;
+        }
+        if (w) {
+            RGB_IN(r, g, b, p);
+            lum[0] = RGB_TO_Y(r, g, b);
+            cb[0] = RGB_TO_U(r, g, b, 0);
+            cr[0] = RGB_TO_V(r, g, b, 0);
+        }
+    }
+}
+
+static void rgb24_to_yuvj444p(AVPicture *dst, const AVPicture *src,
+                              int width, int height)
+{
+    int src_wrap, x, y;
+    int r, g, b;
+    uint8_t *lum, *cb, *cr;
+    const uint8_t *p;
+
+    lum = dst->data[0];
+    cb = dst->data[1];
+    cr = dst->data[2];
+
+    src_wrap = src->linesize[0] - width * BPP;
+    p = src->data[0];
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            RGB_IN(r, g, b, p);
+            lum[0] = RGB_TO_Y(r, g, b);
+            cb[0] = RGB_TO_U(r, g, b, 0);
+            cr[0] = RGB_TO_V(r, g, b, 0);
+            p += BPP;
+            cb++;
+            cr++;
+            lum++;
+        }
+        p += src_wrap;
+        lum += dst->linesize[0] - width;
+        cb += dst->linesize[1] - width;
+        cr += dst->linesize[2] - width;
+    }
+}
+
+#endif /* FMT_RGB24 */
+
+#if defined(FMT_RGB24) || defined(FMT_RGBA32)
+
+static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src,
+                                     int width, int height)
+{
+    const unsigned char *p;
+    unsigned char *q;
+    int dst_wrap, src_wrap;
+    int x, y, has_alpha;
+    unsigned int r, g, b;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - BPP * width;
+
+    q = dst->data[0];
+    dst_wrap = dst->linesize[0] - width;
+    has_alpha = 0;
+    
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+#ifdef RGBA_IN
+            {
+                unsigned int a;
+                RGBA_IN(r, g, b, a, p);
+                /* crude approximation for alpha ! */
+                if (a < 0x80) {
+                    has_alpha = 1;
+                    q[0] = TRANSP_INDEX;
+                } else {
+                    q[0] = gif_clut_index(r, g, b);
+                }
+            }
+#else
+            RGB_IN(r, g, b, p);
+            q[0] = gif_clut_index(r, g, b);
+#endif
+            q++;
+            p += BPP;
+        }
+        p += src_wrap;
+        q += dst_wrap;
+    }
+
+    build_rgb_palette(dst->data[1], has_alpha);
+}
+
+#endif /* defined(FMT_RGB24) || defined(FMT_RGBA32) */
+        
+#ifdef RGBA_IN
+
+static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src,
+					   int width, int height)
+{
+    const unsigned char *p;
+    int src_wrap, ret, x, y;
+    unsigned int r, g, b, a;
+
+    p = src->data[0];
+    src_wrap = src->linesize[0] - BPP * width;
+    ret = 0;
+    for(y=0;y<height;y++) {
+        for(x=0;x<width;x++) {
+            RGBA_IN(r, g, b, a, p);
+            if (a == 0x00) {
+                ret |= FF_ALPHA_TRANSP;
+            } else if (a != 0xff) {
+                ret |= FF_ALPHA_SEMI_TRANSP;
+            }
+            p += BPP;
+        }
+        p += src_wrap;
+    }
+    return ret;
+}
+
+#endif /* RGBA_IN */
+
+#undef RGB_IN
+#undef RGBA_IN
+#undef RGB_OUT
+#undef RGBA_OUT
+#undef BPP
+#undef RGB_NAME
+#undef FMT_RGB24
+#undef FMT_RGBA32
diff --git a/gst/ffmpegcolorspace/mem.c b/gst/ffmpegcolorspace/mem.c
new file mode 100644
index 0000000000..58862c418e
--- /dev/null
+++ b/gst/ffmpegcolorspace/mem.c
@@ -0,0 +1,106 @@
+/*
+ * default memory allocator for libavcodec
+ * Copyright (c) 2002 Fabrice Bellard.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/**
+ * @file mem.c
+ * default memory allocator for libavcodec.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "avcodec.h"
+
+/* here we can use OS dependant allocation functions */
+#undef malloc
+#undef free
+#undef realloc
+
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+/* you can redefine av_malloc and av_free in your project to use your
+   memory allocator. You do not need to suppress this file because the
+   linker will do it automatically */
+
+/** 
+ * Memory allocation of size byte with alignment suitable for all
+ * memory accesses (including vectors if available on the
+ * CPU). av_malloc(0) must return a non NULL pointer.
+ */
+void *
+av_malloc (unsigned int size)
+{
+  void *ptr;
+
+#if defined (HAVE_MEMALIGN)
+  ptr = memalign (16, size);
+  /* Why 64? 
+     Indeed, we should align it:
+     on 4 for 386
+     on 16 for 486
+     on 32 for 586, PPro - k6-III
+     on 64 for K7 (maybe for P3 too).
+     Because L1 and L2 caches are aligned on those values.
+     But I don't want to code such logic here!
+   */
+  /* Why 16?
+     because some cpus need alignment, for example SSE2 on P4, & most RISC cpus
+     it will just trigger an exception and the unaligned load will be done in the
+     exception handler or it will just segfault (SSE2 on P4)
+     Why not larger? because i didnt see a difference in benchmarks ...
+   */
+  /* benchmarks with p3
+     memalign(64)+1          3071,3051,3032
+     memalign(64)+2          3051,3032,3041
+     memalign(64)+4          2911,2896,2915
+     memalign(64)+8          2545,2554,2550
+     memalign(64)+16         2543,2572,2563
+     memalign(64)+32         2546,2545,2571
+     memalign(64)+64         2570,2533,2558
+
+     btw, malloc seems to do 8 byte alignment by default here
+   */
+#else
+  ptr = malloc (size);
+#endif
+  return ptr;
+}
+
+/**
+ * av_realloc semantics (same as glibc): if ptr is NULL and size > 0,
+ * identical to malloc(size). If size is zero, it is identical to
+ * free(ptr) and NULL is returned.  
+ */
+void *
+av_realloc (void *ptr, unsigned int size)
+{
+  return realloc (ptr, size);
+}
+
+/* NOTE: ptr = NULL is explicetly allowed */
+void
+av_free (void *ptr)
+{
+  /* XXX: this test should not be needed on most libcs */
+  if (ptr)
+    free (ptr);
+}
diff --git a/gst/ffmpegcolorspace/mmx.h b/gst/ffmpegcolorspace/mmx.h
new file mode 100644
index 0000000000..7e94cfd9b1
--- /dev/null
+++ b/gst/ffmpegcolorspace/mmx.h
@@ -0,0 +1,243 @@
+/*
+ * mmx.h
+ * Copyright (C) 1997-2001 H. Dietz and R. Fisher
+ */
+#ifndef AVCODEC_I386MMX_H
+#define AVCODEC_I386MMX_H
+
+/*
+ * The type of an value that fits in an MMX register (note that long
+ * long constant values MUST be suffixed by LL and unsigned long long
+ * values by ULL, lest they be truncated by the compiler)
+ */
+
+typedef	union {
+	long long		q;	/* Quadword (64-bit) value */
+	unsigned long long	uq;	/* Unsigned Quadword */
+	int			d[2];	/* 2 Doubleword (32-bit) values */
+	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
+	short			w[4];	/* 4 Word (16-bit) values */
+	unsigned short		uw[4];	/* 4 Unsigned Word */
+	char			b[8];	/* 8 Byte (8-bit) values */
+	unsigned char		ub[8];	/* 8 Unsigned Byte */
+	float			s[2];	/* Single-precision (32-bit) value */
+} mmx_t;	/* On an 8-byte (64-bit) boundary */
+
+
+#define	mmx_i2r(op,imm,reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "i" (imm) )
+
+#define	mmx_m2r(op,mem,reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "m" (mem))
+
+#define	mmx_r2m(op,reg,mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=m" (mem) \
+			      : /* nothing */ )
+
+#define	mmx_r2r(op,regs,regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+
+#define	emms() __asm__ __volatile__ ("emms")
+
+#define	movd_m2r(var,reg)	mmx_m2r (movd, var, reg)
+#define	movd_r2m(reg,var)	mmx_r2m (movd, reg, var)
+#define	movd_r2r(regs,regd)	mmx_r2r (movd, regs, regd)
+
+#define	movq_m2r(var,reg)	mmx_m2r (movq, var, reg)
+#define	movq_r2m(reg,var)	mmx_r2m (movq, reg, var)
+#define	movq_r2r(regs,regd)	mmx_r2r (movq, regs, regd)
+
+#define	packssdw_m2r(var,reg)	mmx_m2r (packssdw, var, reg)
+#define	packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
+#define	packsswb_m2r(var,reg)	mmx_m2r (packsswb, var, reg)
+#define	packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
+
+#define	packuswb_m2r(var,reg)	mmx_m2r (packuswb, var, reg)
+#define	packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
+
+#define	paddb_m2r(var,reg)	mmx_m2r (paddb, var, reg)
+#define	paddb_r2r(regs,regd)	mmx_r2r (paddb, regs, regd)
+#define	paddd_m2r(var,reg)	mmx_m2r (paddd, var, reg)
+#define	paddd_r2r(regs,regd)	mmx_r2r (paddd, regs, regd)
+#define	paddw_m2r(var,reg)	mmx_m2r (paddw, var, reg)
+#define	paddw_r2r(regs,regd)	mmx_r2r (paddw, regs, regd)
+
+#define	paddsb_m2r(var,reg)	mmx_m2r (paddsb, var, reg)
+#define	paddsb_r2r(regs,regd)	mmx_r2r (paddsb, regs, regd)
+#define	paddsw_m2r(var,reg)	mmx_m2r (paddsw, var, reg)
+#define	paddsw_r2r(regs,regd)	mmx_r2r (paddsw, regs, regd)
+
+#define	paddusb_m2r(var,reg)	mmx_m2r (paddusb, var, reg)
+#define	paddusb_r2r(regs,regd)	mmx_r2r (paddusb, regs, regd)
+#define	paddusw_m2r(var,reg)	mmx_m2r (paddusw, var, reg)
+#define	paddusw_r2r(regs,regd)	mmx_r2r (paddusw, regs, regd)
+
+#define	pand_m2r(var,reg)	mmx_m2r (pand, var, reg)
+#define	pand_r2r(regs,regd)	mmx_r2r (pand, regs, regd)
+
+#define	pandn_m2r(var,reg)	mmx_m2r (pandn, var, reg)
+#define	pandn_r2r(regs,regd)	mmx_r2r (pandn, regs, regd)
+
+#define	pcmpeqb_m2r(var,reg)	mmx_m2r (pcmpeqb, var, reg)
+#define	pcmpeqb_r2r(regs,regd)	mmx_r2r (pcmpeqb, regs, regd)
+#define	pcmpeqd_m2r(var,reg)	mmx_m2r (pcmpeqd, var, reg)
+#define	pcmpeqd_r2r(regs,regd)	mmx_r2r (pcmpeqd, regs, regd)
+#define	pcmpeqw_m2r(var,reg)	mmx_m2r (pcmpeqw, var, reg)
+#define	pcmpeqw_r2r(regs,regd)	mmx_r2r (pcmpeqw, regs, regd)
+
+#define	pcmpgtb_m2r(var,reg)	mmx_m2r (pcmpgtb, var, reg)
+#define	pcmpgtb_r2r(regs,regd)	mmx_r2r (pcmpgtb, regs, regd)
+#define	pcmpgtd_m2r(var,reg)	mmx_m2r (pcmpgtd, var, reg)
+#define	pcmpgtd_r2r(regs,regd)	mmx_r2r (pcmpgtd, regs, regd)
+#define	pcmpgtw_m2r(var,reg)	mmx_m2r (pcmpgtw, var, reg)
+#define	pcmpgtw_r2r(regs,regd)	mmx_r2r (pcmpgtw, regs, regd)
+
+#define	pmaddwd_m2r(var,reg)	mmx_m2r (pmaddwd, var, reg)
+#define	pmaddwd_r2r(regs,regd)	mmx_r2r (pmaddwd, regs, regd)
+
+#define	pmulhw_m2r(var,reg)	mmx_m2r (pmulhw, var, reg)
+#define	pmulhw_r2r(regs,regd)	mmx_r2r (pmulhw, regs, regd)
+
+#define	pmullw_m2r(var,reg)	mmx_m2r (pmullw, var, reg)
+#define	pmullw_r2r(regs,regd)	mmx_r2r (pmullw, regs, regd)
+
+#define	por_m2r(var,reg)	mmx_m2r (por, var, reg)
+#define	por_r2r(regs,regd)	mmx_r2r (por, regs, regd)
+
+#define	pslld_i2r(imm,reg)	mmx_i2r (pslld, imm, reg)
+#define	pslld_m2r(var,reg)	mmx_m2r (pslld, var, reg)
+#define	pslld_r2r(regs,regd)	mmx_r2r (pslld, regs, regd)
+#define	psllq_i2r(imm,reg)	mmx_i2r (psllq, imm, reg)
+#define	psllq_m2r(var,reg)	mmx_m2r (psllq, var, reg)
+#define	psllq_r2r(regs,regd)	mmx_r2r (psllq, regs, regd)
+#define	psllw_i2r(imm,reg)	mmx_i2r (psllw, imm, reg)
+#define	psllw_m2r(var,reg)	mmx_m2r (psllw, var, reg)
+#define	psllw_r2r(regs,regd)	mmx_r2r (psllw, regs, regd)
+
+#define	psrad_i2r(imm,reg)	mmx_i2r (psrad, imm, reg)
+#define	psrad_m2r(var,reg)	mmx_m2r (psrad, var, reg)
+#define	psrad_r2r(regs,regd)	mmx_r2r (psrad, regs, regd)
+#define	psraw_i2r(imm,reg)	mmx_i2r (psraw, imm, reg)
+#define	psraw_m2r(var,reg)	mmx_m2r (psraw, var, reg)
+#define	psraw_r2r(regs,regd)	mmx_r2r (psraw, regs, regd)
+
+#define	psrld_i2r(imm,reg)	mmx_i2r (psrld, imm, reg)
+#define	psrld_m2r(var,reg)	mmx_m2r (psrld, var, reg)
+#define	psrld_r2r(regs,regd)	mmx_r2r (psrld, regs, regd)
+#define	psrlq_i2r(imm,reg)	mmx_i2r (psrlq, imm, reg)
+#define	psrlq_m2r(var,reg)	mmx_m2r (psrlq, var, reg)
+#define	psrlq_r2r(regs,regd)	mmx_r2r (psrlq, regs, regd)
+#define	psrlw_i2r(imm,reg)	mmx_i2r (psrlw, imm, reg)
+#define	psrlw_m2r(var,reg)	mmx_m2r (psrlw, var, reg)
+#define	psrlw_r2r(regs,regd)	mmx_r2r (psrlw, regs, regd)
+
+#define	psubb_m2r(var,reg)	mmx_m2r (psubb, var, reg)
+#define	psubb_r2r(regs,regd)	mmx_r2r (psubb, regs, regd)
+#define	psubd_m2r(var,reg)	mmx_m2r (psubd, var, reg)
+#define	psubd_r2r(regs,regd)	mmx_r2r (psubd, regs, regd)
+#define	psubw_m2r(var,reg)	mmx_m2r (psubw, var, reg)
+#define	psubw_r2r(regs,regd)	mmx_r2r (psubw, regs, regd)
+
+#define	psubsb_m2r(var,reg)	mmx_m2r (psubsb, var, reg)
+#define	psubsb_r2r(regs,regd)	mmx_r2r (psubsb, regs, regd)
+#define	psubsw_m2r(var,reg)	mmx_m2r (psubsw, var, reg)
+#define	psubsw_r2r(regs,regd)	mmx_r2r (psubsw, regs, regd)
+
+#define	psubusb_m2r(var,reg)	mmx_m2r (psubusb, var, reg)
+#define	psubusb_r2r(regs,regd)	mmx_r2r (psubusb, regs, regd)
+#define	psubusw_m2r(var,reg)	mmx_m2r (psubusw, var, reg)
+#define	psubusw_r2r(regs,regd)	mmx_r2r (psubusw, regs, regd)
+
+#define	punpckhbw_m2r(var,reg)		mmx_m2r (punpckhbw, var, reg)
+#define	punpckhbw_r2r(regs,regd)	mmx_r2r (punpckhbw, regs, regd)
+#define	punpckhdq_m2r(var,reg)		mmx_m2r (punpckhdq, var, reg)
+#define	punpckhdq_r2r(regs,regd)	mmx_r2r (punpckhdq, regs, regd)
+#define	punpckhwd_m2r(var,reg)		mmx_m2r (punpckhwd, var, reg)
+#define	punpckhwd_r2r(regs,regd)	mmx_r2r (punpckhwd, regs, regd)
+
+#define	punpcklbw_m2r(var,reg) 		mmx_m2r (punpcklbw, var, reg)
+#define	punpcklbw_r2r(regs,regd)	mmx_r2r (punpcklbw, regs, regd)
+#define	punpckldq_m2r(var,reg)		mmx_m2r (punpckldq, var, reg)
+#define	punpckldq_r2r(regs,regd)	mmx_r2r (punpckldq, regs, regd)
+#define	punpcklwd_m2r(var,reg)		mmx_m2r (punpcklwd, var, reg)
+#define	punpcklwd_r2r(regs,regd)	mmx_r2r (punpcklwd, regs, regd)
+
+#define	pxor_m2r(var,reg)	mmx_m2r (pxor, var, reg)
+#define	pxor_r2r(regs,regd)	mmx_r2r (pxor, regs, regd)
+
+
+/* 3DNOW extensions */
+
+#define pavgusb_m2r(var,reg)	mmx_m2r (pavgusb, var, reg)
+#define pavgusb_r2r(regs,regd)	mmx_r2r (pavgusb, regs, regd)
+
+
+/* AMD MMX extensions - also available in intel SSE */
+
+
+#define mmx_m2ri(op,mem,reg,imm) \
+        __asm__ __volatile__ (#op " %1, %0, %%" #reg \
+                              : /* nothing */ \
+                              : "X" (mem), "X" (imm))
+#define mmx_r2ri(op,regs,regd,imm) \
+        __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
+                              : /* nothing */ \
+                              : "X" (imm) )
+
+#define	mmx_fetch(mem,hint) \
+	__asm__ __volatile__ ("prefetch" #hint " %0" \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+
+#define	maskmovq(regs,maskreg)		mmx_r2ri (maskmovq, regs, maskreg)
+
+#define	movntq_r2m(mmreg,var)		mmx_r2m (movntq, mmreg, var)
+
+#define	pavgb_m2r(var,reg)		mmx_m2r (pavgb, var, reg)
+#define	pavgb_r2r(regs,regd)		mmx_r2r (pavgb, regs, regd)
+#define	pavgw_m2r(var,reg)		mmx_m2r (pavgw, var, reg)
+#define	pavgw_r2r(regs,regd)		mmx_r2r (pavgw, regs, regd)
+
+#define	pextrw_r2r(mmreg,reg,imm)	mmx_r2ri (pextrw, mmreg, reg, imm)
+
+#define	pinsrw_r2r(reg,mmreg,imm)	mmx_r2ri (pinsrw, reg, mmreg, imm)
+
+#define	pmaxsw_m2r(var,reg)		mmx_m2r (pmaxsw, var, reg)
+#define	pmaxsw_r2r(regs,regd)		mmx_r2r (pmaxsw, regs, regd)
+
+#define	pmaxub_m2r(var,reg)		mmx_m2r (pmaxub, var, reg)
+#define	pmaxub_r2r(regs,regd)		mmx_r2r (pmaxub, regs, regd)
+
+#define	pminsw_m2r(var,reg)		mmx_m2r (pminsw, var, reg)
+#define	pminsw_r2r(regs,regd)		mmx_r2r (pminsw, regs, regd)
+
+#define	pminub_m2r(var,reg)		mmx_m2r (pminub, var, reg)
+#define	pminub_r2r(regs,regd)		mmx_r2r (pminub, regs, regd)
+
+#define	pmovmskb(mmreg,reg) \
+	__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+
+#define	pmulhuw_m2r(var,reg)		mmx_m2r (pmulhuw, var, reg)
+#define	pmulhuw_r2r(regs,regd)		mmx_r2r (pmulhuw, regs, regd)
+
+#define	prefetcht0(mem)			mmx_fetch (mem, t0)
+#define	prefetcht1(mem)			mmx_fetch (mem, t1)
+#define	prefetcht2(mem)			mmx_fetch (mem, t2)
+#define	prefetchnta(mem)		mmx_fetch (mem, nta)
+
+#define	psadbw_m2r(var,reg)		mmx_m2r (psadbw, var, reg)
+#define	psadbw_r2r(regs,regd)		mmx_r2r (psadbw, regs, regd)
+
+#define	pshufw_m2r(var,reg,imm)		mmx_m2ri(pshufw, var, reg, imm)
+#define	pshufw_r2r(regs,regd,imm)	mmx_r2ri(pshufw, regs, regd, imm)
+
+#define	sfence() __asm__ __volatile__ ("sfence\n\t")
+
+#endif /* AVCODEC_I386MMX_H */
diff --git a/gst/ffmpegcolorspace/utils.c b/gst/ffmpegcolorspace/utils.c
new file mode 100644
index 0000000000..d776fcdbfb
--- /dev/null
+++ b/gst/ffmpegcolorspace/utils.c
@@ -0,0 +1,45 @@
+/*
+ * utils for libavcodec
+ * Copyright (c) 2001 Fabrice Bellard.
+ * Copyright (c) 2003 Michel Bardiaux for the av_log API
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/**
+ * @file utils.c
+ * utils.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "avcodec.h"
+#include "dsputil.h"
+
+/* must be called before any other functions */
+void
+avcodec_init (void)
+{
+  static int inited = 0;
+
+  if (inited != 0)
+    return;
+  inited = 1;
+
+  dsputil_static_init ();
+}