gstreamer/libs/colorspace/yuv2rgb.c
Wim Taymans 5b0ba06f28 Added MMX optimized yuv2rgb (AlienSong now plays back at only 6% CPU)
Original commit message from CVS:
Added MMX optimized yuv2rgb (AlienSong now plays back at only 6% CPU)
Added mpeg1 picture skipping and fixed a buffer overflow.
Added a system clock. The audiosink can now adjust the clock.
Fixed incorrect behaviour on 8, 15, 16, 24 and 32 bits displays.
Cleanup of the videosink, it now uses the color conversion library when
needed.
2000-04-09 21:36:56 +00:00

1058 lines
34 KiB
C

/* Gnome-Streamer
* Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include "config.h"
#include <math.h>
#include <stdlib.h>
//#define DEBUG_ENABLED
#include <gst/gst.h>
#include <gstcolorspace.h>
#ifdef HAVE_LIBMMX
#include "mmx.h"
#endif
#include "yuv2rgb.h"
static GstBuffer *gst_colorspace_yuv422P_to_rgb32(GstBuffer *src, GstColorSpaceParameters *params);
static GstBuffer *gst_colorspace_yuv422P_to_bgr32(GstBuffer *src, GstColorSpaceParameters *params);
static GstBuffer *gst_colorspace_yuv422P_to_bgr32_mmx(GstBuffer *src, GstColorSpaceParameters *params);
static GstBuffer *gst_colorspace_yuv422P_to_rgb24(GstBuffer *src, GstColorSpaceParameters *params);
static GstBuffer *gst_colorspace_yuv422P_to_bgr24(GstBuffer *src, GstColorSpaceParameters *params);
static GstBuffer *gst_colorspace_yuv422P_to_rgb16(GstBuffer *src, GstColorSpaceParameters *params);
static GstBuffer *gst_colorspace_yuv422P_to_bgr16_mmx(GstBuffer *src, GstColorSpaceParameters *params);
static void gst_colorspace_yuv_to_rgb16(GstColorSpaceYUVTables *tables,
unsigned char *lum,
unsigned char *cr,
unsigned char *cb,
unsigned char *out,
int cols, int rows);
static void gst_colorspace_yuv_to_rgb24(GstColorSpaceYUVTables *tables,
unsigned char *lum,
unsigned char *cr,
unsigned char *cb,
unsigned char *out,
int cols, int rows);
static void gst_colorspace_yuv_to_rgb32(GstColorSpaceYUVTables *tables,
unsigned char *lum,
unsigned char *cr,
unsigned char *cb,
unsigned char *out,
int cols, int rows);
#ifdef HAVE_LIBMMX
static void gst_colorspace_yuv_to_bgr32_mmx(GstColorSpaceYUVTables *tables,
unsigned char *lum,
unsigned char *cr,
unsigned char *cb,
unsigned char *out,
int cols, int rows);
static void gst_colorspace_yuv_to_bgr16_mmx(GstColorSpaceYUVTables *tables,
unsigned char *lum,
unsigned char *cr,
unsigned char *cb,
unsigned char *out,
int cols, int rows);
#endif
static GstColorSpaceYUVTables * gst_colorspace_init_yuv(long depth,
long red_mask, long green_mask, long blue_mask);
GstColorSpaceConverter gst_colorspace_yuv2rgb_get_converter(GstColorSpace src, GstColorSpace dest) {
DEBUG("gst_colorspace_yuv2rgb_get_converter %d\n", dest);
switch(src) {
case GST_COLORSPACE_YUV422P:
switch(dest) {
case GST_COLORSPACE_BGR32:
//return gst_colorspace_yuv422P_to_bgr32;
return gst_colorspace_yuv422P_to_bgr32_mmx;
case GST_COLORSPACE_RGB32:
return gst_colorspace_yuv422P_to_rgb32;
case GST_COLORSPACE_RGB24:
return gst_colorspace_yuv422P_to_rgb24;
case GST_COLORSPACE_BGR24:
return gst_colorspace_yuv422P_to_bgr24;
case GST_COLORSPACE_RGB555:
case GST_COLORSPACE_RGB565:
case GST_COLORSPACE_BGR555:
return gst_colorspace_yuv422P_to_rgb16;
case GST_COLORSPACE_BGR565:
return gst_colorspace_yuv422P_to_bgr16_mmx;
default:
break;
}
break;
default:
break;
}
g_print("gst_colorspace_yuv2rgb not implemented\n");
return NULL;
}
static GstBuffer *gst_colorspace_yuv422P_to_bgr32(GstBuffer *src, GstColorSpaceParameters *params) {
static GstColorSpaceYUVTables *color_tables = NULL;
int size;
GstBuffer *buf = NULL;
guchar *out;
DEBUG("gst_colorspace_yuv422P_to_bgr32\n");
g_return_val_if_fail(params != NULL, NULL);
if (color_tables == NULL) {
color_tables = gst_colorspace_init_yuv(32, 0xFF0000, 0x00FF00, 0x0000FF);
}
size = params->width * params->height;
if (params->outbuf == NULL) {
buf = gst_buffer_new();
out = GST_BUFFER_DATA(buf) = g_malloc(size * 4);
GST_BUFFER_SIZE(buf) = size * 4;
}
else out = params->outbuf;
gst_colorspace_yuv_to_rgb32(color_tables,
GST_BUFFER_DATA(src), // Y component
GST_BUFFER_DATA(src)+size, // cr component
GST_BUFFER_DATA(src)+size+(size>>2), // cb component
out,
params->height,
params->width);
if (buf) {
gst_buffer_unref(src);
return buf;
}
else return src;
}
static GstBuffer *gst_colorspace_yuv422P_to_rgb32(GstBuffer *src, GstColorSpaceParameters *params) {
static GstColorSpaceYUVTables *color_tables = NULL;
int size;
GstBuffer *buf = NULL;
guchar *out;
DEBUG("gst_colorspace_yuv422P_to_rgb32\n");
g_return_val_if_fail(params != NULL, NULL);
if (color_tables == NULL) {
color_tables = gst_colorspace_init_yuv(32, 0x0000FF, 0x00FF00, 0xFF0000);
}
size = params->width * params->height;
if (params->outbuf == NULL) {
buf = gst_buffer_new();
out = GST_BUFFER_DATA(buf) = g_malloc(size * 4);
GST_BUFFER_SIZE(buf) = size * 4;
}
else out = params->outbuf;
gst_colorspace_yuv_to_rgb32(color_tables,
GST_BUFFER_DATA(src), // Y component
GST_BUFFER_DATA(src)+size, // cr component
GST_BUFFER_DATA(src)+size+(size>>2), // cb component
out,
params->height,
params->width);
if (buf) {
gst_buffer_unref(src);
return buf;
}
else return src;
}
static GstBuffer *gst_colorspace_yuv422P_to_bgr24(GstBuffer *src, GstColorSpaceParameters *params) {
static GstColorSpaceYUVTables *color_tables = NULL;
int size;
GstBuffer *buf = NULL;
guchar *out;
DEBUG("gst_colorspace_yuv422P_to_bgr24\n");
g_return_val_if_fail(params != NULL, NULL);
if (color_tables == NULL) {
color_tables = gst_colorspace_init_yuv(24, 0xFF0000, 0x00FF00, 0x0000FF);
}
size = params->width * params->height;
if (params->outbuf == NULL) {
buf = gst_buffer_new();
out = GST_BUFFER_DATA(buf) = g_malloc(size * 3);
GST_BUFFER_SIZE(buf) = size * 3;
}
else out = params->outbuf;
gst_colorspace_yuv_to_rgb24(color_tables,
GST_BUFFER_DATA(src), // Y component
GST_BUFFER_DATA(src)+size, // cr component
GST_BUFFER_DATA(src)+size+(size>>2), // cb component
out,
params->height,
params->width);
if (buf) {
gst_buffer_unref(src);
return buf;
}
else return src;
}
static GstBuffer *gst_colorspace_yuv422P_to_rgb24(GstBuffer *src, GstColorSpaceParameters *params) {
static GstColorSpaceYUVTables *color_tables = NULL;
int size;
GstBuffer *buf = NULL;
guchar *out;
DEBUG("gst_colorspace_yuv422P_to_rgb24\n");
g_return_val_if_fail(params != NULL, NULL);
if (color_tables == NULL) {
color_tables = gst_colorspace_init_yuv(24, 0x0000FF, 0x00FF00, 0xFF0000);
}
size = params->width * params->height;
if (params->outbuf == NULL) {
buf = gst_buffer_new();
out = GST_BUFFER_DATA(buf) = g_malloc(size * 3);
GST_BUFFER_SIZE(buf) = size * 3;
}
else out = params->outbuf;
gst_colorspace_yuv_to_rgb24(color_tables,
GST_BUFFER_DATA(src), // Y component
GST_BUFFER_DATA(src)+size, // cr component
GST_BUFFER_DATA(src)+size+(size>>2), // cb component
out,
params->height,
params->width);
if (buf) {
gst_buffer_unref(src);
return buf;
}
else return src;
}
static GstBuffer *gst_colorspace_yuv422P_to_rgb16(GstBuffer *src, GstColorSpaceParameters *params) {
static GstColorSpaceYUVTables *color_tables = NULL;
int size;
DEBUG("gst_colorspace_yuv422P_to_rgb16\n");
g_return_val_if_fail(params != NULL, NULL);
g_return_val_if_fail(params->visual != NULL, NULL);
if (color_tables == NULL) {
color_tables = gst_colorspace_init_yuv(16, params->visual->red_mask, params->visual->green_mask, params->visual->blue_mask);
}
size = params->width * params->height;
gst_colorspace_yuv_to_rgb16(color_tables,
GST_BUFFER_DATA(src), // Y component
GST_BUFFER_DATA(src)+size, // cr component
GST_BUFFER_DATA(src)+size+(size>>2), // cb component
params->outbuf,
params->height,
params->width);
return src;
}
#ifdef HAVE_LIBMMX
static mmx_t MMX16_redmask = (mmx_t)(long long)0xf800f800f800f800LL; //dd 07c00 7c00h, 07c007c00h
static mmx_t MMX16_grnmask = (mmx_t)(long long)0x07e007e007e007e0LL; //dd 003e0 03e0h, 003e003e0h
static GstBuffer *gst_colorspace_yuv422P_to_bgr32_mmx(GstBuffer *src, GstColorSpaceParameters *params) {
int size;
GstBuffer *buf = NULL;
guchar *out;
DEBUG("gst_colorspace_yuv422P_to_rgb32_mmx\n");
g_return_val_if_fail(params != NULL, NULL);
size = params->width * params->height;
if (params->outbuf == NULL) {
buf = gst_buffer_new();
out = GST_BUFFER_DATA(buf) = g_malloc(size * 4);
GST_BUFFER_SIZE(buf) = size * 4;
}
else out = params->outbuf;
gst_colorspace_yuv_to_bgr32_mmx(NULL,
GST_BUFFER_DATA(src), // Y component
GST_BUFFER_DATA(src)+size, // cr component
GST_BUFFER_DATA(src)+size+(size>>2), // cb component
out,
params->height,
params->width);
if (buf) {
gst_buffer_unref(src);
return buf;
}
else return src;
}
static GstBuffer *gst_colorspace_yuv422P_to_bgr16_mmx(GstBuffer *src, GstColorSpaceParameters *params) {
int size;
DEBUG("gst_colorspace_yuv422P_to_bgr16_mmx \n");
g_return_val_if_fail(params != NULL, NULL);
size = params->width * params->height;
gst_colorspace_yuv_to_bgr16_mmx(NULL,
GST_BUFFER_DATA(src), // Y component
GST_BUFFER_DATA(src)+size, // cr component
GST_BUFFER_DATA(src)+size+(size>>2), // cb component
params->outbuf,
params->height,
params->width);
return src;
}
#endif
/*
* How many 1 bits are there in the longword.
* Low performance, do not call often.
*/
static int
number_of_bits_set(a)
unsigned long a;
{
if(!a) return 0;
if(a & 1) return 1 + number_of_bits_set(a >> 1);
return(number_of_bits_set(a >> 1));
}
/*
* Shift the 0s in the least significant end out of the longword.
* Low performance, do not call often.
*/
static unsigned long
shifted_down(a)
unsigned long a;
{
if(!a) return 0;
if(a & 1) return a;
return a >> 1;
}
/*
* How many 0 bits are there at most significant end of longword.
* Low performance, do not call often.
*/
static int
free_bits_at_top(a)
unsigned long a;
{
/* assume char is 8 bits */
if(!a) return sizeof(unsigned long) * 8;
/* assume twos complement */
if(((long)a) < 0l) return 0;
return 1 + free_bits_at_top ( a << 1);
}
/*
* How many 0 bits are there at least significant end of longword.
* Low performance, do not call often.
*/
static int
free_bits_at_bottom(a)
unsigned long a;
{
/* assume char is 8 bits */
if(!a) return sizeof(unsigned long) * 8;
if(((long)a) & 1l) return 0;
return 1 + free_bits_at_bottom ( a >> 1);
}
/*
*--------------------------------------------------------------
*
* InitColor16Dither --
*
* To get rid of the multiply and other conversions in color
* dither, we use a lookup table.
*
* Results:
* None.
*
* Side effects:
* The lookup tables are initialized.
*
*--------------------------------------------------------------
*/
static GstColorSpaceYUVTables *
gst_colorspace_init_yuv(long depth, long red_mask, long green_mask, long blue_mask)
{
int CR, CB, i;
int *L_tab, *Cr_r_tab, *Cr_g_tab, *Cb_g_tab, *Cb_b_tab;
long *r_2_pix_alloc;
long *g_2_pix_alloc;
long *b_2_pix_alloc;
GstColorSpaceYUVTables *tables = g_malloc(sizeof(GstColorSpaceYUVTables));
L_tab = tables->L_tab = (int *)malloc(256*sizeof(int));
Cr_r_tab = tables->Cr_r_tab = (int *)malloc(256*sizeof(int));
Cr_g_tab = tables->Cr_g_tab = (int *)malloc(256*sizeof(int));
Cb_g_tab = tables->Cb_g_tab = (int *)malloc(256*sizeof(int));
Cb_b_tab = tables->Cb_b_tab = (int *)malloc(256*sizeof(int));
r_2_pix_alloc = (long *)malloc(768*sizeof(long));
g_2_pix_alloc = (long *)malloc(768*sizeof(long));
b_2_pix_alloc = (long *)malloc(768*sizeof(long));
if (L_tab == NULL ||
Cr_r_tab == NULL ||
Cr_g_tab == NULL ||
Cb_g_tab == NULL ||
Cb_b_tab == NULL ||
r_2_pix_alloc == NULL ||
g_2_pix_alloc == NULL ||
b_2_pix_alloc == NULL) {
fprintf(stderr, "Could not get enough memory in InitColorDither\n");
exit(1);
}
for (i=0; i<256; i++) {
L_tab[i] = i;
/*
if (gammaCorrectFlag) {
L_tab[i] = GAMMA_CORRECTION(i);
}
*/
CB = CR = i;
/*
if (chromaCorrectFlag) {
CB -= 128;
CB = CHROMA_CORRECTION128(CB);
CR -= 128;
CR = CHROMA_CORRECTION128(CR);
}
else
*/
{
CB -= 128; CR -= 128;
}
Cr_r_tab[i] = (0.419/0.299) * CR;
Cr_g_tab[i] = -(0.299/0.419) * CR;
Cb_g_tab[i] = -(0.114/0.331) * CB;
Cb_b_tab[i] = (0.587/0.331) * CB;
}
/*
* Set up entries 0-255 in rgb-to-pixel value tables.
*/
for (i = 0; i < 256; i++) {
r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(red_mask));
r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(red_mask);
g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(green_mask));
g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(green_mask);
b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(blue_mask));
b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(blue_mask);
/*
* If we have 16-bit output depth, then we double the value
* in the top word. This means that we can write out both
* pixels in the pixel doubling mode with one op. It is
* harmless in the normal case as storing a 32-bit value
* through a short pointer will lose the top bits anyway.
* A similar optimisation for Alpha for 64 bit has been
* prepared for, but is not yet implemented.
*/
if(!(depth == 32) && !(depth == 24)) {
r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
}
#ifdef SIXTYFOUR_BIT
if(depth == 32) {
r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 32;
g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 32;
b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 32;
}
#endif
}
/*
* Spread out the values we have to the rest of the array so that
* we do not need to check for overflow.
*/
for (i = 0; i < 256; i++) {
r_2_pix_alloc[i] = r_2_pix_alloc[256];
r_2_pix_alloc[i+ 512] = r_2_pix_alloc[511];
g_2_pix_alloc[i] = g_2_pix_alloc[256];
g_2_pix_alloc[i+ 512] = g_2_pix_alloc[511];
b_2_pix_alloc[i] = b_2_pix_alloc[256];
b_2_pix_alloc[i+ 512] = b_2_pix_alloc[511];
}
tables->r_2_pix = r_2_pix_alloc + 256;
tables->g_2_pix = g_2_pix_alloc + 256;
tables->b_2_pix = b_2_pix_alloc + 256;
return tables;
}
/*
*--------------------------------------------------------------
*
* Color16DitherImage --
*
* Converts image into 16 bit color.
*
* Results:
* None.
*
* Side effects:
* None.
*
*--------------------------------------------------------------
*/
static void
gst_colorspace_yuv_to_rgb16(tables, lum, cr, cb, out, rows, cols)
GstColorSpaceYUVTables *tables;
unsigned char *lum;
unsigned char *cr;
unsigned char *cb;
unsigned char *out;
int cols, rows;
{
int L, CR, CB;
unsigned short *row1, *row2;
unsigned char *lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols>>1;
row1 = (unsigned short *)out;
row2 = row1 + cols;
lum2 = lum + cols;
for (y=rows>>1; y; y--) {
for (x=cols_2; x; x--) {
CR = *cr++;
CB = *cb++;
cr_r = tables->Cr_r_tab[CR];
crb_g = tables->Cr_g_tab[CR] + tables->Cb_g_tab[CB];
cb_b = tables->Cb_b_tab[CB];
L = tables->L_tab[(int) *lum++];
*row1++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
L = tables->L_tab[(int) *lum++];
*row1++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
/*
* Now, do second row.
*/
L = tables->L_tab[(int) *lum2++];
*row2++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
L = tables->L_tab[(int) *lum2++];
*row2++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
}
/*
* These values are at the start of the next line, (due
* to the ++'s above),but they need to be at the start
* of the line after that.
*/
lum = lum2;
row1 = row2;
lum2 += cols;
row2 += cols;
}
}
static void
gst_colorspace_yuv_to_rgb24(tables, lum, cr, cb, out, rows, cols)
GstColorSpaceYUVTables *tables;
unsigned char *lum;
unsigned char *cr;
unsigned char *cb;
unsigned char *out;
int cols, rows;
{
int L, CR, CB;
unsigned char *row1, *row2;
unsigned char *lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols>>1;
int cols_3 = cols*3;
unsigned char pixels[4];
row1 = out;
row2 = row1 + cols_3;
lum2 = lum + cols;
for (y=rows>>1; y; y--) {
for (x=cols_2; x; x--) {
CR = *cr++;
CB = *cb++;
cr_r = tables->Cr_r_tab[CR];
crb_g = tables->Cr_g_tab[CR] + tables->Cb_g_tab[CB];
cb_b = tables->Cb_b_tab[CB];
L = tables->L_tab[(int) *lum++];
((int *)pixels)[0] = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
*row1++ = pixels[0]; *row1++ = pixels[1]; *row1++ = pixels[2];
L = tables->L_tab[(int) *lum++];
((int *)pixels)[0] = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
*row1++ = pixels[0]; *row1++ = pixels[1]; *row1++ = pixels[2];
/*
* Now, do second row.
*/
L = tables->L_tab [(int) *lum2++];
((int *)pixels)[0] = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
*row2++ = pixels[0]; *row2++ = pixels[1]; *row2++ = pixels[2];
L = tables->L_tab [(int) *lum2++];
((int *)pixels)[0] = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
*row2++ = pixels[0]; *row2++ = pixels[1]; *row2++ = pixels[2];
}
lum = lum2;
row1 = row2;
lum2 += cols;
row2 += cols_3;
}
}
/*
*--------------------------------------------------------------
*
* Color32DitherImage --
*
* Converts image into 32 bit color (or 24-bit non-packed).
*
* Results:
* None.
*
* Side effects:
* None.
*
*--------------------------------------------------------------
*/
/*
* This is a copysoft version of the function above with ints instead
* of shorts to cause a 4-byte pixel size
*/
static void
gst_colorspace_yuv_to_rgb32(tables, lum, cr, cb, out, rows, cols)
GstColorSpaceYUVTables *tables;
unsigned char *lum;
unsigned char *cr;
unsigned char *cb;
unsigned char *out;
int cols, rows;
{
int L, CR, CB;
unsigned int *row1, *row2;
unsigned char *lum2;
int x, y;
int cr_r;
int crb_g;
int cb_b;
int cols_2 = cols>>1;
row1 = (guint32 *)out;
row2 = row1 + cols;
lum2 = lum + cols;
for (y=rows>>1; y; y--) {
for (x=cols_2; x; x--) {
CR = *cr++;
CB = *cb++;
cr_r = tables->Cr_r_tab[CR];
crb_g = tables->Cr_g_tab[CR] + tables->Cb_g_tab[CB];
cb_b = tables->Cb_b_tab[CB];
L = tables->L_tab[(int) *lum++];
*row1++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
L = tables->L_tab[(int) *lum++];
*row1++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
/*
* Now, do second row.
*/
L = tables->L_tab [(int) *lum2++];
*row2++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
L = tables->L_tab [(int) *lum2++];
*row2++ = (tables->r_2_pix[L+cr_r] | tables->g_2_pix[L+crb_g] | tables->b_2_pix[L+cb_b]);
}
lum = lum2;
row1 = row2;
lum2 += cols;
row2 += cols;
}
}
#ifdef HAVE_LIBMMX
static mmx_t MMX_80w = (mmx_t)(long long)0x0080008000800080LL; //dd 00080 0080h, 000800080h
static mmx_t MMX_00FFw = (mmx_t)(long long)0x00ff00ff00ff00ffLL; //dd 000FF 00FFh, 000FF00FFh
static mmx_t MMX_FF00w = (mmx_t)(long long)0xff00ff00ff00ff00LL; //dd 000FF 00FFh, 000FF00FFh
static mmx_t MMX16_Vredcoeff = (mmx_t)(long long)0x0066006600660066LL; //dd 00066 0066h, 000660066h
static mmx_t MMX16_Ublucoeff = (mmx_t)(long long)0x0081008100810081LL; //dd 00081 0081h, 000810081h
static mmx_t MMX16_Ugrncoeff = (mmx_t)(long long)0xffe8ffe8ffe8ffe8LL; //dd 0FFE7 FFE7h, 0FFE7FFE7h
static mmx_t MMX16_Vgrncoeff = (mmx_t)(long long)0xffcdffcdffcdffcdLL; //dd 0FFCC FFCCh, 0FFCCFFCCh
static mmx_t MMX16_Ycoeff = (mmx_t)(long long)0x004a004a004a004aLL; //dd 0004A 004Ah, 0004A004Ah
static mmx_t MMX32_Vredcoeff = (mmx_t)(long long)0x0059005900590059LL;
static mmx_t MMX32_Ubluecoeff = (mmx_t)(long long)0x0072007200720072LL;
static mmx_t MMX32_Ugrncoeff = (mmx_t)(long long)0xffeaffeaffeaffeaLL;
static mmx_t MMX32_Vgrncoeff = (mmx_t)(long long)0xffd2ffd2ffd2ffd2LL;
static void
gst_colorspace_yuv_to_bgr16_mmx(tables, lum, cr, cb, out, rows, cols)
GstColorSpaceYUVTables *tables;
unsigned char *lum;
unsigned char *cr;
unsigned char *cb;
unsigned char *out;
int cols, rows;
{
unsigned short *row1 = (unsigned short* )out; // 32 bit target
int cols8 = cols>>3;
int y, x;
for (y=rows>>1; y; y--) {
for (x=cols8; x; x--) {
movd_m2r(*(mmx_t *)cb, mm0); // 4 Cb 0 0 0 0 u3 u2 u1 u0
pxor_r2r(mm7, mm7);
movd_m2r(*(mmx_t *)cr, mm1); // 4 Cr 0 0 0 0 v3 v2 v1 v0
punpcklbw_r2r(mm7, mm0); // 4 W cb 0 u3 0 u2 0 u1 0 u0
punpcklbw_r2r(mm7, mm1); // 4 W cr 0 v3 0 v2 0 v1 0 v0
psubw_m2r(MMX_80w, mm0);
psubw_m2r(MMX_80w, mm1);
movq_r2r(mm0, mm2); // Cb 0 u3 0 u2 0 u1 0 u0
movq_r2r(mm1, mm3); // Cr
pmullw_m2r(MMX16_Ugrncoeff, mm2); // Cb2green 0 R3 0 R2 0 R1 0 R0
movq_m2r(*(mmx_t *)lum, mm6); // L1 l7 L6 L5 L4 L3 L2 L1 L0
pmullw_m2r(MMX16_Ublucoeff, mm0); // Cb2blue
pand_m2r(MMX_00FFw, mm6); // L1 00 L6 00 L4 00 L2 00 L0
pmullw_m2r(MMX16_Vgrncoeff, mm3); // Cr2green
movq_m2r(*(mmx_t *)lum, mm7); // L2
pmullw_m2r(MMX16_Vredcoeff, mm1); // Cr2red
// "psubw MMX_10w, %%mm6\n"
psrlw_i2r(8, mm7); // L2 00 L7 00 L5 00 L3 00 L1
pmullw_m2r(MMX16_Ycoeff, mm6); // lum1
// "psubw MMX_10w, %%mm7\n" // L2
paddw_r2r(mm3, mm2); // Cb2green + Cr2green == green
pmullw_m2r(MMX16_Ycoeff, mm7); // lum2
movq_r2r(mm6, mm4); // lum1
paddw_r2r(mm0, mm6); // lum1 +blue 00 B6 00 B4 00 B2 00 B0
movq_r2r(mm4, mm5); // lum1
paddw_r2r(mm1, mm4); // lum1 +red 00 R6 00 R4 00 R2 00 R0
paddw_r2r(mm2, mm5); // lum1 +green 00 G6 00 G4 00 G2 00 G0
psraw_i2r(6, mm4); // R1 0 .. 64
movq_r2r(mm7, mm3); // lum2 00 L7 00 L5 00 L3 00 L1
psraw_i2r(6, mm5); // G1 - .. +
paddw_r2r(mm0, mm7); // Lum2 +blue 00 B7 00 B5 00 B3 00 B1
psraw_i2r(6, mm6); // B1 0 .. 64
packuswb_r2r(mm4, mm4); // R1 R1
packuswb_r2r(mm5, mm5); // G1 G1
packuswb_r2r(mm6, mm6); // B1 B1
punpcklbw_r2r(mm4, mm4);
punpcklbw_r2r(mm5, mm5);
pand_m2r(MMX16_redmask, mm4);
psllw_i2r(3, mm5); // GREEN 1
punpcklbw_r2r(mm6, mm6);
pand_m2r(MMX16_grnmask, mm5);
pand_m2r(MMX16_redmask, mm6);
por_r2r(mm5, mm4); //
psrlw_i2r(11, mm6); // BLUE 1
movq_r2r(mm3, mm5); // lum2
paddw_r2r(mm1, mm3); // lum2 +red 00 R7 00 R5 00 R3 00 R1
paddw_r2r(mm2, mm5); // lum2 +green 00 G7 00 G5 00 G3 00 G1
psraw_i2r(6, mm3); // R2
por_r2r(mm6, mm4); // MM4
psraw_i2r(6, mm5); // G2
movq_m2r(*(mmx_t *)(lum+cols), mm6); // L3 load lum2
psraw_i2r(6, mm7);
packuswb_r2r(mm3, mm3);
packuswb_r2r(mm5, mm5);
packuswb_r2r(mm7, mm7);
pand_m2r(MMX_00FFw, mm6); // L3
punpcklbw_r2r(mm3, mm3);
// "psubw MMX_10w, %%mm6\n" // L3
punpcklbw_r2r(mm5, mm5);
pmullw_m2r(MMX16_Ycoeff, mm6); // lum3
punpcklbw_r2r(mm7, mm7);
psllw_i2r(3, mm5); // GREEN 2
pand_m2r(MMX16_redmask, mm7);
pand_m2r(MMX16_redmask, mm3);
psrlw_i2r(11, mm7); // BLUE 2
pand_m2r(MMX16_grnmask, mm5);
por_r2r(mm7, mm3);
movq_m2r(*(mmx_t *)(lum+cols), mm7); // L4 load lum2
por_r2r(mm5, mm3); //
psrlw_i2r(8, mm7); // L4
movq_r2r(mm4, mm5);
// "psubw MMX_10w, %%mm7\n" // L4
punpcklwd_r2r(mm3, mm4);
pmullw_m2r(MMX16_Ycoeff, mm7); // lum4
punpckhwd_r2r(mm3, mm5);
movq_r2m(mm4, *(row1)); // write row1
movq_r2m(mm5, *(row1+4)); // write row1
movq_r2r(mm6, mm4); // Lum3
paddw_r2r(mm0, mm6); // Lum3 +blue
movq_r2r(mm4, mm5); // Lum3
paddw_r2r(mm1, mm4); // Lum3 +red
paddw_r2r(mm2, mm5); // Lum3 +green
psraw_i2r(6, mm4);
movq_r2r(mm7, mm3); // Lum4
psraw_i2r(6, mm5);
paddw_r2r(mm0, mm7); // Lum4 +blue
psraw_i2r(6, mm6); // Lum3 +blue
movq_r2r(mm3, mm0); // Lum4
packuswb_r2r(mm4, mm4);
paddw_r2r(mm1, mm3); // Lum4 +red
packuswb_r2r(mm5, mm5);
paddw_r2r(mm2, mm0); // Lum4 +green
packuswb_r2r(mm6, mm6);
punpcklbw_r2r(mm4, mm4);
punpcklbw_r2r(mm5, mm5);
punpcklbw_r2r(mm6, mm6);
psllw_i2r(3, mm5); // GREEN 3
pand_m2r(MMX16_redmask, mm4);
psraw_i2r(6, mm3); // psr 6
psraw_i2r(6, mm0);
pand_m2r(MMX16_redmask, mm6); // BLUE
pand_m2r(MMX16_grnmask, mm5);
psrlw_i2r(11, mm6); // BLUE 3
por_r2r(mm5, mm4);
psraw_i2r(6, mm7);
por_r2r(mm6, mm4);
packuswb_r2r(mm3, mm3);
packuswb_r2r(mm0, mm0);
packuswb_r2r(mm7, mm7);
punpcklbw_r2r(mm3, mm3);
punpcklbw_r2r(mm0, mm0);
punpcklbw_r2r(mm7, mm7);
pand_m2r(MMX16_redmask, mm3);
pand_m2r(MMX16_redmask, mm7); // BLUE
psllw_i2r(3, mm0); // GREEN 4
psrlw_i2r(11, mm7);
pand_m2r(MMX16_grnmask, mm0);
por_r2r(mm7, mm3);
por_r2r(mm0, mm3);
movq_r2r(mm4, mm5);
punpcklwd_r2r(mm3, mm4);
punpckhwd_r2r(mm3, mm5);
movq_r2m(mm4, *(row1+cols));
movq_r2m(mm5, *(row1+cols+4));
lum+=8;
cr+=4;
cb+=4;
row1 +=8;
}
lum += cols;
row1 += cols;
}
emms();
}
static void
gst_colorspace_yuv_to_bgr32_mmx(tables, lum, cr, cb, out, rows, cols)
GstColorSpaceYUVTables *tables;
unsigned char *lum;
unsigned char *cr;
unsigned char *cb;
unsigned char *out;
int cols, rows;
{
guint32 *row1 = (guint32 *)out; // 32 bit target
int cols4 = cols>>2;
int y, x;
for (y=rows>>1; y; y--) {
for (x=cols4; x; x--) {
// create Cr (result in mm1)
movd_m2r(*(mmx_t *)cr, mm1); // 0 0 0 0 v3 v2 v1 v0
pxor_r2r(mm7, mm7); // 00 00 00 00 00 00 00 00
movd_m2r(*(mmx_t *)lum, mm2); // 0 0 0 0 l3 l2 l1 l0
punpcklbw_r2r(mm7, mm1); // 0 v3 0 v2 00 v1 00 v0
punpckldq_r2r(mm1, mm1); // 00 v1 00 v0 00 v1 00 v0
psubw_m2r(MMX_80w, mm1); // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0
// create Cr_g (result in mm0)
movq_r2r(mm1, mm0); // r1 r1 r0 r0 r1 r1 r0 r0
pmullw_m2r(MMX32_Vgrncoeff, mm0); // red*-46dec=0.7136*64
pmullw_m2r(MMX32_Vredcoeff, mm1); // red*89dec=1.4013*64
psraw_i2r(6, mm0); // red=red/64
psraw_i2r(6, mm1); // red=red/64
// create L1 L2 (result in mm2,mm4)
// L2=lum+cols
movq_m2r(*(mmx_t *)(lum+cols),mm3); // 0 0 0 0 L3 L2 L1 L0
punpckldq_r2r(mm3, mm2); // L3 L2 L1 L0 l3 l2 l1 l0
movq_r2r(mm2, mm4); // L3 L2 L1 L0 l3 l2 l1 l0
pand_m2r(MMX_FF00w, mm2); // L3 0 L1 0 l3 0 l1 0
pand_m2r(MMX_00FFw, mm4); // 0 L2 0 L0 0 l2 0 l0
psrlw_i2r(8, mm2); // 0 L3 0 L1 0 l3 0 l1
// create R (result in mm6)
movq_r2r(mm2, mm5); // 0 L3 0 L1 0 l3 0 l1
movq_r2r(mm4, mm6); // 0 L2 0 L0 0 l2 0 l0
paddsw_r2r(mm1, mm5); // lum1+red:x R3 x R1 x r3 x r1
paddsw_r2r(mm1, mm6); // lum1+red:x R2 x R0 x r2 x r0
packuswb_r2r(mm5, mm5); // R3 R1 r3 r1 R3 R1 r3 r1
packuswb_r2r(mm6, mm6); // R2 R0 r2 r0 R2 R0 r2 r0
pxor_r2r(mm7, mm7); // 00 00 00 00 00 00 00 00
punpcklbw_r2r(mm5, mm6); // R3 R2 R1 R0 r3 r2 r1 r0
// create Cb (result in mm1)
movd_m2r(*(mmx_t *)cb, mm1); // 0 0 0 0 u3 u2 u1 u0
punpcklbw_r2r(mm7, mm1); // 0 u3 0 u2 00 u1 00 u0
punpckldq_r2r(mm1, mm1); // 00 u1 00 u0 00 u1 00 u0
psubw_m2r(MMX_80w, mm1); // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0
// create Cb_g (result in mm5)
movq_r2r(mm1, mm5); // u1 u1 u0 u0 u1 u1 u0 u0
pmullw_m2r(MMX32_Ugrncoeff, mm5); // blue*-109dec=1.7129*64
pmullw_m2r(MMX32_Ubluecoeff, mm1); // blue*114dec=1.78125*64
psraw_i2r(6, mm5); // blue=red/64
psraw_i2r(6, mm1); // blue=blue/64
// create G (result in mm7)
movq_r2r(mm2, mm3); // 0 L3 0 L1 0 l3 0 l1
movq_r2r(mm4, mm7); // 0 L2 0 L0 0 l2 0 l1
paddsw_r2r(mm5, mm3); // lum1+Cb_g:x G3t x G1t x g3t x g1t
paddsw_r2r(mm5, mm7); // lum1+Cb_g:x G2t x G0t x g2t x g0t
paddsw_r2r(mm0, mm3); // lum1+Cr_g:x G3 x G1 x g3 x g1
paddsw_r2r(mm0, mm7); // lum1+blue:x G2 x G0 x g2 x g0
packuswb_r2r(mm3, mm3); // G3 G1 g3 g1 G3 G1 g3 g1
packuswb_r2r(mm7, mm7); // G2 G0 g2 g0 G2 G0 g2 g0
punpcklbw_r2r(mm3, mm7); // G3 G2 G1 G0 g3 g2 g1 g0
// create B (result in mm5)
movq_r2r(mm2, mm3); // 0 L3 0 L1 0 l3 0 l1
movq_r2r(mm4, mm5); // 0 L2 0 L0 0 l2 0 l1
paddsw_r2r(mm1, mm3); // lum1+blue:x B3 x B1 x b3 x b1
paddsw_r2r(mm1, mm5); // lum1+blue:x B2 x B0 x b2 x b0
packuswb_r2r(mm3, mm3); // B3 B1 b3 b1 B3 B1 b3 b1
packuswb_r2r(mm5, mm5); // B2 B0 b2 b0 B2 B0 b2 b0
punpcklbw_r2r(mm3, mm5); // B3 B2 B1 B0 b3 b2 b1 b0
// fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
pxor_r2r(mm2, mm2); // 0 0 0 0 0 0 0 0
pxor_r2r(mm4, mm4); // 0 0 0 0 0 0 0 0
movq_r2r(mm6, mm1); // R3 R2 R1 R0 r3 r2 r1 r0
movq_r2r(mm5, mm3); // B3 B2 B1 B0 b3 b2 b1 b0
// process lower lum
punpcklbw_r2r(mm4, mm1); // 0 r3 0 r2 0 r1 0 r0
punpcklbw_r2r(mm4, mm3); // 0 b3 0 b2 0 b1 0 b0
movq_r2r(mm1, mm2); // 0 r3 0 r2 0 r1 0 r0
movq_r2r(mm3, mm0); // 0 b3 0 b2 0 b1 0 b0
punpcklwd_r2r(mm1, mm3); // 0 r1 0 b1 0 r0 0 b0
punpckhwd_r2r(mm2, mm0); // 0 r3 0 b3 0 r2 0 b2
pxor_r2r(mm2, mm2); // 0 0 0 0 0 0 0 0
movq_r2r(mm7, mm1); // G3 G2 G1 G0 g3 g2 g1 g0
punpcklbw_r2r(mm1, mm2); // g3 0 g2 0 g1 0 g0 0
punpcklwd_r2r(mm4, mm2); // 0 0 g1 0 0 0 g0 0
por_r2r(mm3, mm2); // 0 r1 g1 b1 0 r0 g0 b0
movq_r2m(mm2, *(mmx_t *)row1); // wrote out ! row1
pxor_r2r(mm2, mm2); // 0 0 0 0 0 0 0 0
punpcklbw_r2r(mm1, mm4); // g3 0 g2 0 g1 0 g0 0
punpckhwd_r2r(mm2, mm4); // 0 0 g3 0 0 0 g2 0
por_r2r(mm0, mm4); // 0 r3 g3 b3 0 r2 g2 b2
movq_r2m(mm4, *(mmx_t *)(row1+2)); // wrote out ! row1
// fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb)
// this can be done "destructive"
pxor_r2r(mm2, mm2); // 0 0 0 0 0 0 0 0
punpckhbw_r2r(mm2, mm6); // 0 R3 0 R2 0 R1 0 R0
punpckhbw_r2r(mm1, mm5); // G3 B3 G2 B2 G1 B1 G0 B0
movq_r2r(mm5, mm1); // G3 B3 G2 B2 G1 B1 G0 B0
punpcklwd_r2r(mm6, mm1); // 0 R1 G1 B1 0 R0 G0 B0
movq_r2m(mm1, *(mmx_t *)(row1+cols)); // wrote out ! row2
punpckhwd_r2r(mm6, mm5); // 0 R3 G3 B3 0 R2 G2 B2
movq_r2m(mm5, *(mmx_t *)(row1+cols+2)); // wrote out ! row2
lum+=4;
cr+=2;
cb+=2;
row1 +=4;
}
lum += cols;
row1 += cols;
}
emms();
}
#endif