cog: Improvements in colorspace and scaler

Add fast paths for YUV->YUV conversions and Orc code for all.
Use Orc for horizontal resampling.
This commit is contained in:
David Schleef 2010-08-13 21:54:54 -07:00
parent 785fb87caf
commit 05992323b6
3 changed files with 947 additions and 49 deletions

View file

@ -630,19 +630,14 @@ cog_virt_frame_render_resample_horiz_1tap (CogFrame * frame, void *_dest,
{
uint8_t *dest = _dest;
uint8_t *src;
int j;
int n_src;
int scale = frame->param1;
int acc;
n_src = frame->virt_frame1->components[component].width;
src = cog_virt_frame_get_line (frame->virt_frame1, component, i);
acc = 0;
for (j = 0; j < frame->components[component].width; j++) {
dest[j] = src[(acc >> 8)];
acc += scale;
}
cogorc_resample_horiz_1tap (dest, src, 0, scale,
frame->components[component].width);
}
static void
@ -651,45 +646,14 @@ cog_virt_frame_render_resample_horiz_2tap (CogFrame * frame, void *_dest,
{
uint8_t *dest = _dest;
uint8_t *src;
int j;
int n_src;
int scale = frame->param1;
int acc;
n_src = frame->virt_frame1->components[component].width;
src = cog_virt_frame_get_line (frame->virt_frame1, component, i);
acc = 0;
for (j = 0; j < frame->components[component].width - 2; j++) {
int src_i;
int y;
int z;
src_i = acc >> 8;
y = acc & 255;
z = 128;
z += (256 - y) * src[src_i + 0];
z += y * src[src_i + 1];
z >>= 8;
dest[j] = CLAMP (z, 0, 255);
acc += scale;
}
for (; j < frame->components[component].width; j++) {
int src_i;
int y;
int z;
src_i = acc >> 8;
y = acc & 255;
z = 128;
z += (256 - y) * src[CLAMP (src_i + 0, 0, n_src - 1)];
z += y * src[CLAMP (src_i + 1, 0, n_src - 1)];
z >>= 8;
dest[j] = CLAMP (z, 0, 255);
acc += scale;
}
cogorc_resample_horiz_2tap (dest, src, 0, scale,
frame->components[component].width);
}
static void
@ -712,8 +676,8 @@ cog_virt_frame_render_resample_horiz_4tap (CogFrame * frame, void *_dest,
int y;
int z;
src_i = acc >> 8;
y = acc & 255;
src_i = acc >> 16;
y = (acc >> 8) & 255;
z = 32;
z += cog_resample_table_4tap[y][0] * src[CLAMP (src_i - 1, 0, n_src - 1)];
@ -729,8 +693,8 @@ cog_virt_frame_render_resample_horiz_4tap (CogFrame * frame, void *_dest,
int y;
int z;
src_i = acc >> 8;
y = acc & 255;
src_i = acc >> 16;
y = (acc >> 8) & 255;
z = 32;
z += cog_resample_table_4tap[y][0] * src[src_i - 1];
@ -746,8 +710,8 @@ cog_virt_frame_render_resample_horiz_4tap (CogFrame * frame, void *_dest,
int y;
int z;
src_i = acc >> 8;
y = acc & 255;
src_i = acc >> 16;
y = (acc >> 8) & 255;
z = 32;
z += cog_resample_table_4tap[y][0] * src[CLAMP (src_i - 1, 0, n_src - 1)];
@ -775,7 +739,7 @@ cog_virt_frame_new_horiz_resample (CogFrame * vf, int width, int n_taps)
virt_frame->render_line = cog_virt_frame_render_resample_horiz_4tap;
}
virt_frame->param1 = 256 * vf->width / width;
virt_frame->param1 = 65536 * vf->width / width;
return virt_frame;
}
@ -1429,7 +1393,6 @@ cog_virt_frame_new_color_matrix_YCbCr_to_RGB (CogFrame * vf,
CogColorMatrix color_matrix, int bits)
{
CogFrame *virt_frame;
//int *matrix = frame->virt_priv2;
virt_frame = cog_frame_new_virtual (NULL, COG_FRAME_FORMAT_U8_444,
vf->width, vf->height);

View file

@ -37,6 +37,7 @@
#include <math.h>
#include <cog/cogvirtframe.h>
#include "gstcogutils.h"
#include "gstcogorc.h"
#define GST_TYPE_COGCOLORSPACE \
(gst_cogcolorspace_get_type())
@ -383,6 +384,425 @@ gst_cogcolorspace_caps_get_chroma_site (GstCaps * caps)
return COG_CHROMA_SITE_MPEG2;
}
static void
convert_I420_YUY2 (CogFrame * dest, CogFrame * src)
{
int i;
for (i = 0; i < dest->height; i += 2) {
cogorc_convert_I420_YUY2 (COG_FRAME_DATA_GET_LINE (dest->components + 0, i),
COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1),
COG_FRAME_DATA_GET_LINE (src->components + 0, i),
COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1),
COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1),
(dest->width + 1) / 2);
}
}
static void
convert_I420_UYVY (CogFrame * dest, CogFrame * src)
{
int i;
for (i = 0; i < dest->height; i += 2) {
cogorc_convert_I420_UYVY (COG_FRAME_DATA_GET_LINE (dest->components + 0, i),
COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1),
COG_FRAME_DATA_GET_LINE (src->components + 0, i),
COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1),
COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1),
(dest->width + 1) / 2);
}
}
static void
convert_I420_AYUV (CogFrame * dest, CogFrame * src)
{
int i;
for (i = 0; i < dest->height; i += 2) {
cogorc_convert_I420_AYUV (COG_FRAME_DATA_GET_LINE (dest->components + 0, i),
COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1),
COG_FRAME_DATA_GET_LINE (src->components + 0, i),
COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1),
COG_FRAME_DATA_GET_LINE (src->components + 1, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 2, i >> 1), dest->width);
}
}
static void
convert_I420_Y42B (CogFrame * dest, CogFrame * src)
{
cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride,
src->components[0].data, src->components[0].stride,
dest->width, dest->height);
cogorc_planar_chroma_420_422 (dest->components[1].data,
2 * dest->components[1].stride,
COG_FRAME_DATA_GET_LINE (dest->components + 2, 1),
2 * dest->components[1].stride, src->components[1].data,
src->components[1].stride, (dest->width + 1) / 2, dest->height / 2);
cogorc_planar_chroma_420_422 (dest->components[2].data,
2 * dest->components[2].stride,
COG_FRAME_DATA_GET_LINE (dest->components + 2, 1),
2 * dest->components[2].stride, src->components[2].data,
src->components[2].stride, (dest->width + 1) / 2, dest->height / 2);
}
static void
convert_I420_Y444 (CogFrame * dest, CogFrame * src)
{
cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride,
src->components[0].data, src->components[0].stride,
dest->width, dest->height);
cogorc_planar_chroma_420_444 (dest->components[1].data,
2 * dest->components[1].stride,
COG_FRAME_DATA_GET_LINE (dest->components + 1, 1),
2 * dest->components[1].stride, src->components[1].data,
src->components[1].stride, (dest->width + 1) / 2, (dest->height + 1) / 2);
cogorc_planar_chroma_420_444 (dest->components[2].data,
2 * dest->components[2].stride,
COG_FRAME_DATA_GET_LINE (dest->components + 2, 1),
2 * dest->components[2].stride, src->components[2].data,
src->components[2].stride, (dest->width + 1) / 2, (dest->height + 1) / 2);
}
static void
convert_YUY2_I420 (CogFrame * dest, CogFrame * src)
{
int i;
for (i = 0; i < dest->height; i += 2) {
cogorc_convert_YUY2_I420 (COG_FRAME_DATA_GET_LINE (dest->components + 0, i),
COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1),
COG_FRAME_DATA_GET_LINE (dest->components + 1, i >> 1),
COG_FRAME_DATA_GET_LINE (dest->components + 2, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 0, i),
COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1),
(dest->width + 1) / 2);
}
}
static void
convert_YUY2_AYUV (CogFrame * dest, CogFrame * src)
{
cogorc_convert_YUY2_AYUV (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_YUY2_Y42B (CogFrame * dest, CogFrame * src)
{
cogorc_convert_YUY2_Y42B (dest->components[0].data,
dest->components[0].stride, dest->components[1].data,
dest->components[1].stride, dest->components[2].data,
dest->components[2].stride, src->components[0].data,
src->components[0].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_YUY2_Y444 (CogFrame * dest, CogFrame * src)
{
cogorc_convert_YUY2_Y444 (dest->components[0].data,
dest->components[0].stride, dest->components[1].data,
dest->components[1].stride, dest->components[2].data,
dest->components[2].stride, src->components[0].data,
src->components[0].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_UYVY_I420 (CogFrame * dest, CogFrame * src)
{
int i;
for (i = 0; i < dest->height; i += 2) {
cogorc_convert_UYVY_I420 (COG_FRAME_DATA_GET_LINE (dest->components + 0, i),
COG_FRAME_DATA_GET_LINE (dest->components + 0, i + 1),
COG_FRAME_DATA_GET_LINE (dest->components + 1, i >> 1),
COG_FRAME_DATA_GET_LINE (dest->components + 2, i >> 1),
COG_FRAME_DATA_GET_LINE (src->components + 0, i),
COG_FRAME_DATA_GET_LINE (src->components + 0, i + 1),
(dest->width + 1) / 2);
}
}
static void
convert_UYVY_AYUV (CogFrame * dest, CogFrame * src)
{
cogorc_convert_UYVY_AYUV (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_UYVY_YUY2 (CogFrame * dest, CogFrame * src)
{
cogorc_convert_UYVY_YUY2 (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_UYVY_Y42B (CogFrame * dest, CogFrame * src)
{
cogorc_convert_UYVY_Y42B (dest->components[0].data,
dest->components[0].stride, dest->components[1].data,
dest->components[1].stride, dest->components[2].data,
dest->components[2].stride, src->components[0].data,
src->components[0].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_UYVY_Y444 (CogFrame * dest, CogFrame * src)
{
cogorc_convert_UYVY_Y444 (dest->components[0].data,
dest->components[0].stride, dest->components[1].data,
dest->components[1].stride, dest->components[2].data,
dest->components[2].stride, src->components[0].data,
src->components[0].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_AYUV_I420 (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_I420 (COG_FRAME_DATA_GET_LINE (dest->components + 0, 0),
2 * dest->components[0].stride,
COG_FRAME_DATA_GET_LINE (dest->components + 0, 1),
2 * dest->components[0].stride,
dest->components[1].data, dest->components[1].stride,
dest->components[2].data, dest->components[2].stride,
COG_FRAME_DATA_GET_LINE (src->components + 0, 0),
/* FIXME why not 2* ? */
src->components[0].stride,
COG_FRAME_DATA_GET_LINE (src->components + 0, 1),
src->components[0].stride, dest->width / 2, dest->height / 2);
}
static void
convert_AYUV_YUY2 (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_YUY2 (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, dest->width / 2, dest->height);
}
static void
convert_AYUV_UYVY (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_UYVY (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, dest->width / 2, dest->height);
}
static void
convert_AYUV_Y42B (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_Y42B (dest->components[0].data,
dest->components[0].stride, dest->components[1].data,
dest->components[1].stride, dest->components[2].data,
dest->components[2].stride, src->components[0].data,
src->components[0].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_AYUV_Y444 (CogFrame * dest, CogFrame * src)
{
cogorc_convert_AYUV_Y444 (dest->components[0].data,
dest->components[0].stride, dest->components[1].data,
dest->components[1].stride, dest->components[2].data,
dest->components[2].stride, src->components[0].data,
src->components[0].stride, dest->width, dest->height);
}
static void
convert_Y42B_I420 (CogFrame * dest, CogFrame * src)
{
cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride,
src->components[0].data, src->components[0].stride,
dest->width, dest->height);
cogorc_planar_chroma_422_420 (dest->components[1].data,
dest->components[1].stride, src->components[1].data,
2 * src->components[1].stride,
COG_FRAME_DATA_GET_LINE (src->components + 1, 1),
2 * src->components[1].stride, (dest->width + 1) / 2,
(dest->height + 1) / 2);
cogorc_planar_chroma_422_420 (dest->components[2].data,
dest->components[2].stride, src->components[2].data,
2 * src->components[2].stride,
COG_FRAME_DATA_GET_LINE (src->components + 2, 1),
2 * src->components[2].stride, (dest->width + 1) / 2,
(dest->height + 1) / 2);
}
static void
convert_Y42B_Y444 (CogFrame * dest, CogFrame * src)
{
cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride,
src->components[0].data, src->components[0].stride,
dest->width, dest->height);
cogorc_planar_chroma_422_444 (dest->components[1].data,
dest->components[1].stride, src->components[1].data,
src->components[1].stride, (dest->width + 1) / 2, dest->height);
cogorc_planar_chroma_422_444 (dest->components[2].data,
dest->components[2].stride, src->components[2].data,
src->components[2].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_Y42B_YUY2 (CogFrame * dest, CogFrame * src)
{
cogorc_convert_Y42B_YUY2 (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, src->components[1].data,
src->components[1].stride, src->components[2].data,
src->components[2].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_Y42B_UYVY (CogFrame * dest, CogFrame * src)
{
cogorc_convert_Y42B_UYVY (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, src->components[1].data,
src->components[1].stride, src->components[2].data,
src->components[2].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_Y42B_AYUV (CogFrame * dest, CogFrame * src)
{
cogorc_convert_Y42B_AYUV (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, src->components[1].data,
src->components[1].stride, src->components[2].data,
src->components[2].stride, (dest->width) / 2, dest->height);
}
static void
convert_Y444_I420 (CogFrame * dest, CogFrame * src)
{
cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride,
src->components[0].data, src->components[0].stride,
dest->width, dest->height);
cogorc_planar_chroma_444_420 (dest->components[1].data,
dest->components[1].stride, src->components[1].data,
2 * src->components[1].stride,
COG_FRAME_DATA_GET_LINE (src->components + 1, 1),
2 * src->components[1].stride, (dest->width + 1) / 2,
(dest->height + 1) / 2);
cogorc_planar_chroma_444_420 (dest->components[2].data,
dest->components[2].stride, src->components[2].data,
2 * src->components[2].stride,
COG_FRAME_DATA_GET_LINE (src->components + 2, 1),
2 * src->components[2].stride, (dest->width + 1) / 2,
(dest->height + 1) / 2);
}
static void
convert_Y444_Y42B (CogFrame * dest, CogFrame * src)
{
cogorc_memcpy_2d (dest->components[0].data, dest->components[0].stride,
src->components[0].data, src->components[0].stride,
dest->width, dest->height);
cogorc_planar_chroma_444_422 (dest->components[1].data,
dest->components[1].stride, src->components[1].data,
src->components[1].stride, (dest->width + 1) / 2, dest->height);
cogorc_planar_chroma_444_422 (dest->components[2].data,
dest->components[2].stride, src->components[2].data,
src->components[2].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_Y444_YUY2 (CogFrame * dest, CogFrame * src)
{
cogorc_convert_Y444_YUY2 (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, src->components[1].data,
src->components[1].stride, src->components[2].data,
src->components[2].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_Y444_UYVY (CogFrame * dest, CogFrame * src)
{
cogorc_convert_Y444_UYVY (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, src->components[1].data,
src->components[1].stride, src->components[2].data,
src->components[2].stride, (dest->width + 1) / 2, dest->height);
}
static void
convert_Y444_AYUV (CogFrame * dest, CogFrame * src)
{
cogorc_convert_Y444_AYUV (dest->components[0].data,
dest->components[0].stride, src->components[0].data,
src->components[0].stride, src->components[1].data,
src->components[1].stride, src->components[2].data,
src->components[2].stride, dest->width, dest->height);
}
typedef struct
{
uint32_t in_format;
uint32_t out_format;
void (*convert) (CogFrame * dest, CogFrame * src);
} CogColorspaceTransform;
static CogColorspaceTransform transforms[] = {
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_YUY2, convert_I420_YUY2},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_UYVY, convert_I420_UYVY},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_AYUV, convert_I420_AYUV},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_Y42B, convert_I420_Y42B},
{GST_VIDEO_FORMAT_I420, GST_VIDEO_FORMAT_Y444, convert_I420_Y444},
{GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_I420, convert_YUY2_I420},
{GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_UYVY, convert_UYVY_YUY2}, /* alias */
{GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_AYUV, convert_YUY2_AYUV},
{GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_Y42B, convert_YUY2_Y42B},
{GST_VIDEO_FORMAT_YUY2, GST_VIDEO_FORMAT_Y444, convert_YUY2_Y444},
{GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_I420, convert_UYVY_I420},
{GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_YUY2, convert_UYVY_YUY2},
{GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_AYUV, convert_UYVY_AYUV},
{GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_Y42B, convert_UYVY_Y42B},
{GST_VIDEO_FORMAT_UYVY, GST_VIDEO_FORMAT_Y444, convert_UYVY_Y444},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_I420, convert_AYUV_I420},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_YUY2, convert_AYUV_YUY2},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_UYVY, convert_AYUV_UYVY},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_Y42B, convert_AYUV_Y42B},
{GST_VIDEO_FORMAT_AYUV, GST_VIDEO_FORMAT_Y444, convert_AYUV_Y444},
{GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_I420, convert_Y42B_I420},
{GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_YUY2, convert_Y42B_YUY2},
{GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_UYVY, convert_Y42B_UYVY},
{GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_AYUV, convert_Y42B_AYUV},
{GST_VIDEO_FORMAT_Y42B, GST_VIDEO_FORMAT_Y444, convert_Y42B_Y444},
{GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_I420, convert_Y444_I420},
{GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_YUY2, convert_Y444_YUY2},
{GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_UYVY, convert_Y444_UYVY},
{GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_AYUV, convert_Y444_AYUV},
{GST_VIDEO_FORMAT_Y444, GST_VIDEO_FORMAT_Y42B, convert_Y444_Y42B},
};
static GstFlowReturn
gst_cogcolorspace_transform (GstBaseTransform * base_transform,
GstBuffer * inbuf, GstBuffer * outbuf)
@ -421,6 +841,28 @@ gst_cogcolorspace_transform (GstBaseTransform * base_transform,
out_frame = gst_cog_buffer_wrap (gst_buffer_ref (outbuf),
out_format, width, height);
if (in_format == out_format) {
memcpy (GST_BUFFER_DATA (outbuf), GST_BUFFER_DATA (inbuf),
GST_BUFFER_SIZE (outbuf));
}
{
int i;
for (i = 0; i < sizeof (transforms) / sizeof (transforms[0]); i++) {
if (transforms[i].in_format == in_format &&
transforms[i].out_format == out_format) {
transforms[i].convert (out_frame, frame);
cog_frame_unref (frame);
cog_frame_unref (out_frame);
return GST_FLOW_OK;
}
}
GST_ERROR ("no match");
}
switch (out_format) {
case GST_VIDEO_FORMAT_YUY2:
case GST_VIDEO_FORMAT_UYVY:

View file

@ -1,4 +1,11 @@
.function cogorc_memcpy_2d
.flags 2d
.dest 1 d1
.source 1 s1
copyb d1, s1
.function cogorc_downsample_horiz_cosite_1tap
.dest 1 d1
@ -492,7 +499,7 @@ convubw t2, s2
mullw t2, t2, p2
addw t1, t1, t2
shruw t1, t1, 8
convuuswb d1, t1
convsuswb d1, t1
.function cogorc_combine4_u8
@ -558,5 +565,491 @@ select0wb d1, t1
select1lw t1, s1
select1wb d1, t1
.function cogorc_resample_horiz_1tap
.dest 1 d1
.source 1 s1
.param 4 p1
.param 4 p2
ldresnearb d1, s1, p1, p2
.function cogorc_resample_horiz_2tap
.dest 1 d1
.source 1 s1
.param 4 p1
.param 4 p2
ldreslinb d1, s1, p1, p2
.function cogorc_convert_I420_UYVY
.dest 4 d1
.dest 4 d2
.source 2 y1
.source 2 y2
.source 1 u
.source 1 v
.temp 2 uv
mergebw uv, u, v
x2 mergebw d1, uv, y1
x2 mergebw d2, uv, y2
.function cogorc_convert_I420_YUY2
.dest 4 d1
.dest 4 d2
.source 2 y1
.source 2 y2
.source 1 u
.source 1 v
.temp 2 uv
mergebw uv, u, v
x2 mergebw d1, y1, uv
x2 mergebw d2, y2, uv
.function cogorc_convert_I420_AYUV
.dest 4 d1
.dest 4 d2
.source 1 y1
.source 1 y2
.source 1 u
.source 1 v
.const 1 c255 255
.temp 2 uv
.temp 2 ay
.temp 1 tu
.temp 1 tv
loadupdb tu, u
loadupdb tv, v
mergebw uv, tu, tv
mergebw ay, c255, y1
mergewl d1, ay, uv
mergebw ay, c255, y2
mergewl d2, ay, uv
.function cogorc_convert_YUY2_I420
.dest 2 y1
.dest 2 y2
.dest 1 u
.dest 1 v
.source 4 yuv1
.source 4 yuv2
.temp 2 t1
.temp 2 t2
.temp 2 ty
x2 splitwb t1, ty, yuv1
storew y1, ty
x2 splitwb t2, ty, yuv2
storew y2, ty
x2 avgub t1, t1, t2
splitwb v, u, t1
.function cogorc_convert_UYVY_YUY2
.flags 2d
.dest 4 yuy2
.source 4 uyvy
x2 swapw yuy2, uyvy
.function cogorc_planar_chroma_420_422
.flags 2d
.dest 1 d1
.dest 1 d2
.source 1 s
copyb d1, s
copyb d2, s
.function cogorc_planar_chroma_420_444
.flags 2d
.dest 2 d1
.dest 2 d2
.source 1 s
.temp 2 t
splatbw t, s
storew d1, t
storew d2, t
.function cogorc_planar_chroma_422_444
.flags 2d
.dest 2 d1
.source 1 s
.temp 2 t
splatbw t, s
storew d1, t
.function cogorc_planar_chroma_444_422
.flags 2d
.dest 1 d
.source 2 s
.temp 1 t1
.temp 1 t2
splitwb t1, t2, s
avgub d, t1, t2
.function cogorc_planar_chroma_444_420
.flags 2d
.dest 1 d
.source 2 s1
.source 2 s2
.temp 2 t
.temp 1 t1
.temp 1 t2
x2 avgub t, s1, s2
splitwb t1, t2, t
avgub d, t1, t2
.function cogorc_planar_chroma_422_420
.flags 2d
.dest 1 d
.source 1 s1
.source 1 s2
avgub d, s1, s2
.function cogorc_convert_YUY2_AYUV
.flags 2d
.dest 8 ayuv
.source 4 yuy2
.const 2 c255 0xff
.temp 2 yy
.temp 2 uv
.temp 4 ayay
.temp 4 uvuv
x2 splitwb uv, yy, yuy2
x2 mergebw ayay, c255, yy
mergewl uvuv, uv, uv
x2 mergewl ayuv, ayay, uvuv
.function cogorc_convert_UYVY_AYUV
.flags 2d
.dest 8 ayuv
.source 4 uyvy
.const 2 c255 0xff
.temp 2 yy
.temp 2 uv
.temp 4 ayay
.temp 4 uvuv
x2 splitwb yy, uv, uyvy
x2 mergebw ayay, c255, yy
mergewl uvuv, uv, uv
x2 mergewl ayuv, ayay, uvuv
.function cogorc_convert_YUY2_Y42B
.flags 2d
.dest 2 y
.dest 1 u
.dest 1 v
.source 4 yuy2
.temp 2 uv
x2 splitwb uv, y, yuy2
splitwb v, u, uv
.function cogorc_convert_UYVY_Y42B
.flags 2d
.dest 2 y
.dest 1 u
.dest 1 v
.source 4 uyvy
.temp 2 uv
x2 splitwb y, uv, uyvy
splitwb v, u, uv
.function cogorc_convert_YUY2_Y444
.flags 2d
.dest 2 y
.dest 2 uu
.dest 2 vv
.source 4 yuy2
.temp 2 uv
.temp 1 u
.temp 1 v
x2 splitwb uv, y, yuy2
splitwb v, u, uv
splatbw uu, u
splatbw vv, v
.function cogorc_convert_UYVY_Y444
.flags 2d
.dest 2 y
.dest 2 uu
.dest 2 vv
.source 4 uyvy
.temp 2 uv
.temp 1 u
.temp 1 v
x2 splitwb y, uv, uyvy
splitwb v, u, uv
splatbw uu, u
splatbw vv, v
.function cogorc_convert_UYVY_I420
.dest 2 y1
.dest 2 y2
.dest 1 u
.dest 1 v
.source 4 yuv1
.source 4 yuv2
.temp 2 t1
.temp 2 t2
.temp 2 ty
x2 splitwb ty, t1, yuv1
storew y1, ty
x2 splitwb ty, t2, yuv2
storew y2, ty
x2 avgub t1, t1, t2
splitwb v, u, t1
.function cogorc_convert_AYUV_I420
.flags 2d
.dest 2 y1
.dest 2 y2
.dest 1 u
.dest 1 v
.source 8 ayuv1
.source 8 ayuv2
.temp 4 ay
.temp 4 uv1
.temp 4 uv2
.temp 4 uv
.temp 2 uu
.temp 2 vv
.temp 1 t1
.temp 1 t2
x2 splitlw uv1, ay, ayuv1
x2 select1wb y1, ay
x2 splitlw uv2, ay, ayuv2
x2 select1wb y2, ay
x4 avgub uv, uv1, uv2
x2 splitwb vv, uu, uv
splitwb t1, t2, uu
avgub u, t1, t2
splitwb t1, t2, vv
avgub v, t1, t2
.function cogorc_convert_AYUV_YUY2
.flags 2d
.dest 4 yuy2
.source 8 ayuv
.temp 2 yy
.temp 2 uv1
.temp 2 uv2
.temp 4 ayay
.temp 4 uvuv
x2 splitlw uvuv, ayay, ayuv
splitlw uv1, uv2, uvuv
x2 avgub uv1, uv1, uv2
x2 select1wb yy, ayay
x2 mergebw yuy2, yy, uv1
.function cogorc_convert_AYUV_UYVY
.flags 2d
.dest 4 yuy2
.source 8 ayuv
.temp 2 yy
.temp 2 uv1
.temp 2 uv2
.temp 4 ayay
.temp 4 uvuv
x2 splitlw uvuv, ayay, ayuv
splitlw uv1, uv2, uvuv
x2 avgub uv1, uv1, uv2
x2 select1wb yy, ayay
x2 mergebw yuy2, uv1, yy
.function cogorc_convert_AYUV_Y42B
.flags 2d
.dest 2 y
.dest 1 u
.dest 1 v
.source 8 ayuv
.temp 4 ayay
.temp 4 uvuv
.temp 2 uv1
.temp 2 uv2
x2 splitlw uvuv, ayay, ayuv
splitlw uv1, uv2, uvuv
x2 avgub uv1, uv1, uv2
splitwb v, u, uv1
x2 select1wb y, ayay
.function cogorc_convert_AYUV_Y444
.flags 2d
.dest 1 y
.dest 1 u
.dest 1 v
.source 4 ayuv
.temp 2 ay
.temp 2 uv
splitlw uv, ay, ayuv
splitwb v, u, uv
select1wb y, ay
.function cogorc_convert_Y42B_YUY2
.flags 2d
.dest 4 yuy2
.source 2 y
.source 1 u
.source 1 v
.temp 2 uv
mergebw uv, u, v
x2 mergebw yuy2, y, uv
.function cogorc_convert_Y42B_UYVY
.flags 2d
.dest 4 uyvy
.source 2 y
.source 1 u
.source 1 v
.temp 2 uv
mergebw uv, u, v
x2 mergebw uyvy, uv, y
.function cogorc_convert_Y42B_AYUV
.flags 2d
.dest 8 ayuv
.source 2 yy
.source 1 u
.source 1 v
.const 1 c255 255
.temp 2 uv
.temp 2 ay
.temp 4 uvuv
.temp 4 ayay
mergebw uv, u, v
x2 mergebw ayay, c255, yy
mergewl uvuv, uv, uv
x2 mergewl ayuv, ayay, uvuv
.function cogorc_convert_Y444_YUY2
.flags 2d
.dest 4 yuy2
.source 2 y
.source 2 u
.source 2 v
.temp 2 uv
.temp 4 uvuv
.temp 2 uv1
.temp 2 uv2
x2 mergebw uvuv, u, v
splitlw uv1, uv2, uvuv
x2 avgub uv, uv1, uv2
x2 mergebw yuy2, y, uv
.function cogorc_convert_Y444_UYVY
.flags 2d
.dest 4 uyvy
.source 2 y
.source 2 u
.source 2 v
.temp 2 uv
.temp 4 uvuv
.temp 2 uv1
.temp 2 uv2
x2 mergebw uvuv, u, v
splitlw uv1, uv2, uvuv
x2 avgub uv, uv1, uv2
x2 mergebw uyvy, uv, y
.function cogorc_convert_Y444_AYUV
.flags 2d
.dest 4 ayuv
.source 1 yy
.source 1 u
.source 1 v
.const 1 c255 255
.temp 2 uv
.temp 2 ay
mergebw uv, u, v
mergebw ay, c255, yy
mergewl ayuv, ay, uv