deinterlace: remove assembly code in favor of orc

This commit is contained in:
David Schleef 2010-08-23 02:50:36 -07:00
parent 02196850dd
commit f35d546c71
2 changed files with 1 additions and 486 deletions

View file

@ -86,192 +86,6 @@ deinterlace_scanline_linear_planar_v_c (GstDeinterlaceSimpleMethod * self,
self->parent.row_stride[2]);
}
#ifdef BUILD_X86_ASM
#include "mmx.h"
static void
deinterlace_scanline_linear_mmx (GstDeinterlaceSimpleMethod * self,
guint8 * out, const guint8 * bot, const guint8 * top, gint size)
{
const mmx_t shiftmask = { 0xfefffefffefffeffULL }; /* To avoid shifting chroma to luma. */
int i;
for (i = size / 32; i; --i) {
movq_m2r (*bot, mm0);
movq_m2r (*top, mm1);
movq_m2r (*(bot + 8), mm2);
movq_m2r (*(top + 8), mm3);
movq_m2r (*(bot + 16), mm4);
movq_m2r (*(top + 16), mm5);
movq_m2r (*(bot + 24), mm6);
movq_m2r (*(top + 24), mm7);
pand_m2r (shiftmask, mm0);
pand_m2r (shiftmask, mm1);
pand_m2r (shiftmask, mm2);
pand_m2r (shiftmask, mm3);
pand_m2r (shiftmask, mm4);
pand_m2r (shiftmask, mm5);
pand_m2r (shiftmask, mm6);
pand_m2r (shiftmask, mm7);
psrlw_i2r (1, mm0);
psrlw_i2r (1, mm1);
psrlw_i2r (1, mm2);
psrlw_i2r (1, mm3);
psrlw_i2r (1, mm4);
psrlw_i2r (1, mm5);
psrlw_i2r (1, mm6);
psrlw_i2r (1, mm7);
paddb_r2r (mm1, mm0);
paddb_r2r (mm3, mm2);
paddb_r2r (mm5, mm4);
paddb_r2r (mm7, mm6);
movq_r2m (mm0, *out);
movq_r2m (mm2, *(out + 8));
movq_r2m (mm4, *(out + 16));
movq_r2m (mm6, *(out + 24));
out += 32;
top += 32;
bot += 32;
}
size = (size & 0x1f);
for (i = size / 8; i; --i) {
movq_m2r (*bot, mm0);
movq_m2r (*top, mm1);
pand_m2r (shiftmask, mm0);
pand_m2r (shiftmask, mm1);
psrlw_i2r (1, mm0);
psrlw_i2r (1, mm1);
paddb_r2r (mm1, mm0);
movq_r2m (mm0, *out);
out += 8;
top += 8;
bot += 8;
}
emms ();
size = size & 0xf;
/* Handle last few pixels. */
for (i = size; i; --i) {
*out++ = ((*top++) + (*bot++)) >> 1;
}
}
static void
deinterlace_scanline_linear_packed_mmx (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_mmx (self, out, scanlines->t0, scanlines->b0,
self->parent.row_stride[0]);
}
static void
deinterlace_scanline_linear_planar_y_mmx (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_mmx (self, out, scanlines->t0, scanlines->b0,
self->parent.row_stride[0]);
}
static void
deinterlace_scanline_linear_planar_u_mmx (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_mmx (self, out, scanlines->t0, scanlines->b0,
self->parent.row_stride[1]);
}
static void
deinterlace_scanline_linear_planar_v_mmx (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_mmx (self, out, scanlines->t0, scanlines->b0,
self->parent.row_stride[2]);
}
#include "sse.h"
static void
deinterlace_scanline_linear_mmxext (GstDeinterlaceSimpleMethod * self,
guint8 * out, const guint8 * bot, const guint8 * top, gint size)
{
gint i;
for (i = size / 32; i; --i) {
movq_m2r (*bot, mm0);
movq_m2r (*top, mm1);
movq_m2r (*(bot + 8), mm2);
movq_m2r (*(top + 8), mm3);
movq_m2r (*(bot + 16), mm4);
movq_m2r (*(top + 16), mm5);
movq_m2r (*(bot + 24), mm6);
movq_m2r (*(top + 24), mm7);
pavgb_r2r (mm1, mm0);
pavgb_r2r (mm3, mm2);
pavgb_r2r (mm5, mm4);
pavgb_r2r (mm7, mm6);
movntq_r2m (mm0, *out);
movntq_r2m (mm2, *(out + 8));
movntq_r2m (mm4, *(out + 16));
movntq_r2m (mm6, *(out + 24));
out += 32;
top += 32;
bot += 32;
}
size = (size & 0x1f);
for (i = size / 8; i; --i) {
movq_m2r (*bot, mm0);
movq_m2r (*top, mm1);
pavgb_r2r (mm1, mm0);
movntq_r2m (mm0, *out);
out += 8;
top += 8;
bot += 8;
}
emms ();
size = size & 0xf;
/* Handle last few pixels. */
for (i = size; i; --i) {
*out++ = ((*top++) + (*bot++)) >> 1;
}
}
static void
deinterlace_scanline_linear_packed_mmxext (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_mmxext (self, out, scanlines->t0, scanlines->b0,
self->parent.row_stride[0]);
}
static void
deinterlace_scanline_linear_planar_y_mmxext (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_mmxext (self, out, scanlines->t0, scanlines->b0,
self->parent.row_stride[0]);
}
static void
deinterlace_scanline_linear_planar_u_mmxext (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_mmxext (self, out, scanlines->t0, scanlines->b0,
self->parent.row_stride[1]);
}
static void
deinterlace_scanline_linear_planar_v_mmxext (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_mmxext (self, out, scanlines->t0, scanlines->b0,
self->parent.row_stride[2]);
}
#endif
G_DEFINE_TYPE (GstDeinterlaceMethodLinear, gst_deinterlace_method_linear,
GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
@ -282,10 +96,6 @@ gst_deinterlace_method_linear_class_init (GstDeinterlaceMethodLinearClass *
GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
GstDeinterlaceSimpleMethodClass *dism_class =
(GstDeinterlaceSimpleMethodClass *) klass;
#ifdef BUILD_X86_ASM
guint cpu_flags =
orc_target_get_default_flags (orc_target_get_by_name ("mmx"));
#endif
dim_class->fields_required = 1;
dim_class->name = "Television: Full resolution";
@ -309,63 +119,6 @@ gst_deinterlace_method_linear_class_init (GstDeinterlaceMethodLinearClass *
dism_class->interpolate_scanline_planar_v =
deinterlace_scanline_linear_planar_v_c;
#ifdef BUILD_X86_ASM
if (cpu_flags & ORC_TARGET_MMX_MMXEXT) {
dism_class->interpolate_scanline_ayuv =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_yuy2 =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_yvyu =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_uyvy =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_argb =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_abgr =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_rgba =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_bgra =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_rgb =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_bgr =
deinterlace_scanline_linear_packed_mmxext;
dism_class->interpolate_scanline_planar_y =
deinterlace_scanline_linear_planar_y_mmxext;
dism_class->interpolate_scanline_planar_u =
deinterlace_scanline_linear_planar_u_mmxext;
dism_class->interpolate_scanline_planar_v =
deinterlace_scanline_linear_planar_v_mmxext;
} else if (cpu_flags & ORC_TARGET_MMX_MMX) {
dism_class->interpolate_scanline_ayuv =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_yuy2 =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_yvyu =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_uyvy =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_argb =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_abgr =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_rgba =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_bgra =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_rgb =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_bgr =
deinterlace_scanline_linear_packed_mmx;
dism_class->interpolate_scanline_planar_y =
deinterlace_scanline_linear_planar_y_mmx;
dism_class->interpolate_scanline_planar_u =
deinterlace_scanline_linear_planar_u_mmx;
dism_class->interpolate_scanline_planar_v =
deinterlace_scanline_linear_planar_v_mmx;
}
#endif
}
static void

View file

@ -96,9 +96,7 @@ deinterlace_scanline_linear_blend2_c (GstDeinterlaceSimpleMethod * self,
guint8 * out, const guint8 * m0, const guint8 * t1, const guint8 * b1,
gint size)
{
while (size--) {
*out++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2;
}
deinterlace_line_linear_blend (out, t1, b1, m0, size);
}
static void
@ -133,178 +131,6 @@ deinterlace_scanline_linear_blend2_planar_v_c (GstDeinterlaceSimpleMethod *
scanlines->b1, self->parent.row_stride[2]);
}
#ifdef BUILD_X86_ASM
#include "mmx.h"
static inline void
deinterlace_scanline_linear_blend_mmx (GstDeinterlaceSimpleMethod * self,
guint8 * out, const guint8 * t0, const guint8 * b0, const guint8 * m1,
gint size)
{
gint i;
i = size / 8;
size -= i * 8;
pxor_r2r (mm7, mm7);
while (i--) {
movd_m2r (*t0, mm0);
movd_m2r (*b0, mm1);
movd_m2r (*m1, mm2);
movd_m2r (*(t0 + 4), mm3);
movd_m2r (*(b0 + 4), mm4);
movd_m2r (*(m1 + 4), mm5);
punpcklbw_r2r (mm7, mm0);
punpcklbw_r2r (mm7, mm1);
punpcklbw_r2r (mm7, mm2);
punpcklbw_r2r (mm7, mm3);
punpcklbw_r2r (mm7, mm4);
punpcklbw_r2r (mm7, mm5);
psllw_i2r (1, mm2);
psllw_i2r (1, mm5);
paddw_r2r (mm0, mm2);
paddw_r2r (mm3, mm5);
paddw_r2r (mm1, mm2);
paddw_r2r (mm4, mm5);
psrlw_i2r (2, mm2);
psrlw_i2r (2, mm5);
packuswb_r2r (mm2, mm2);
packuswb_r2r (mm5, mm5);
movd_r2m (mm2, *out);
movd_r2m (mm5, *(out + 4));
out += 8;
t0 += 8;
b0 += 8;
m1 += 8;
}
emms ();
while (size--) {
*out++ = (*t0++ + *b0++ + (*m1++ << 1)) >> 2;
}
}
static void
deinterlace_scanline_linear_blend_packed_mmx (GstDeinterlaceSimpleMethod * self,
guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_blend_mmx (self, out, scanlines->t0,
scanlines->b0, scanlines->m1, self->parent.row_stride[0]);
}
static void
deinterlace_scanline_linear_blend_planar_y_mmx (GstDeinterlaceSimpleMethod *
self, guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_blend_mmx (self, out, scanlines->t0,
scanlines->b0, scanlines->m1, self->parent.row_stride[0]);
}
static void
deinterlace_scanline_linear_blend_planar_u_mmx (GstDeinterlaceSimpleMethod *
self, guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_blend_mmx (self, out, scanlines->t0,
scanlines->b0, scanlines->m1, self->parent.row_stride[1]);
}
static void
deinterlace_scanline_linear_blend_planar_v_mmx (GstDeinterlaceSimpleMethod *
self, guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_blend_mmx (self, out, scanlines->t0,
scanlines->b0, scanlines->m1, self->parent.row_stride[2]);
}
static inline void
deinterlace_scanline_linear_blend2_mmx (GstDeinterlaceSimpleMethod * self,
guint8 * out, const guint8 * m0, const guint8 * t1, const guint8 * b1,
gint size)
{
gint i;
i = size / 8;
size -= i * 8;
pxor_r2r (mm7, mm7);
while (i--) {
movd_m2r (*t1, mm0);
movd_m2r (*b1, mm1);
movd_m2r (*m0, mm2);
movd_m2r (*(t1 + 4), mm3);
movd_m2r (*(b1 + 4), mm4);
movd_m2r (*(m0 + 4), mm5);
punpcklbw_r2r (mm7, mm0);
punpcklbw_r2r (mm7, mm1);
punpcklbw_r2r (mm7, mm2);
punpcklbw_r2r (mm7, mm3);
punpcklbw_r2r (mm7, mm4);
punpcklbw_r2r (mm7, mm5);
psllw_i2r (1, mm2);
psllw_i2r (1, mm5);
paddw_r2r (mm0, mm2);
paddw_r2r (mm3, mm5);
paddw_r2r (mm1, mm2);
paddw_r2r (mm4, mm5);
psrlw_i2r (2, mm2);
psrlw_i2r (2, mm5);
packuswb_r2r (mm2, mm2);
packuswb_r2r (mm5, mm5);
movd_r2m (mm2, *out);
movd_r2m (mm5, *(out + 4));
out += 8;
t1 += 8;
b1 += 8;
m0 += 8;
}
emms ();
while (size--) {
*out++ = (*t1++ + *b1++ + (*m0++ << 1)) >> 2;
}
}
static void
deinterlace_scanline_linear_blend2_packed_mmx (GstDeinterlaceSimpleMethod *
self, guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_blend2_mmx (self, out, scanlines->m0,
scanlines->t1, scanlines->b1, self->parent.row_stride[0]);
}
static void
deinterlace_scanline_linear_blend2_planar_y_mmx (GstDeinterlaceSimpleMethod *
self, guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_blend2_mmx (self, out, scanlines->m0,
scanlines->t1, scanlines->b1, self->parent.row_stride[0]);
}
static void
deinterlace_scanline_linear_blend2_planar_u_mmx (GstDeinterlaceSimpleMethod *
self, guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_blend2_mmx (self, out, scanlines->m0,
scanlines->t1, scanlines->b1, self->parent.row_stride[1]);
}
static void
deinterlace_scanline_linear_blend2_planar_v_mmx (GstDeinterlaceSimpleMethod *
self, guint8 * out, const GstDeinterlaceScanlineData * scanlines)
{
deinterlace_scanline_linear_blend2_mmx (self, out, scanlines->m0,
scanlines->t1, scanlines->b1, self->parent.row_stride[2]);
}
#endif
G_DEFINE_TYPE (GstDeinterlaceMethodLinearBlend,
gst_deinterlace_method_linear_blend, GST_TYPE_DEINTERLACE_SIMPLE_METHOD);
@ -315,10 +141,6 @@ static void
GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
GstDeinterlaceSimpleMethodClass *dism_class =
(GstDeinterlaceSimpleMethodClass *) klass;
#ifdef BUILD_X86_ASM
guint cpu_flags =
orc_target_get_default_flags (orc_target_get_by_name ("mmx"));
#endif
dim_class->fields_required = 2;
dim_class->name = "Blur: Temporal";
@ -371,66 +193,6 @@ static void
dism_class->copy_scanline_planar_v =
deinterlace_scanline_linear_blend2_planar_v_c;
#ifdef BUILD_X86_ASM
if (cpu_flags & ORC_TARGET_MMX_MMX) {
dism_class->interpolate_scanline_yuy2 =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_yvyu =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_uyvy =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_ayuv =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_argb =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_rgba =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_abgr =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_bgra =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_rgb =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_bgr =
deinterlace_scanline_linear_blend_packed_mmx;
dism_class->interpolate_scanline_planar_y =
deinterlace_scanline_linear_blend_planar_y_mmx;
dism_class->interpolate_scanline_planar_u =
deinterlace_scanline_linear_blend_planar_u_mmx;
dism_class->interpolate_scanline_planar_v =
deinterlace_scanline_linear_blend_planar_v_mmx;
dism_class->copy_scanline_ayuv =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_yuy2 =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_yvyu =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_uyvy =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_argb =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_abgr =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_rgba =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_bgra =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_rgb =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_bgr =
deinterlace_scanline_linear_blend2_packed_mmx;
dism_class->copy_scanline_planar_y =
deinterlace_scanline_linear_blend2_planar_y_mmx;
dism_class->copy_scanline_planar_u =
deinterlace_scanline_linear_blend2_planar_u_mmx;
dism_class->copy_scanline_planar_v =
deinterlace_scanline_linear_blend2_planar_v_mmx;
}
#endif
}
static void