gstreamer/gst/deinterlace2/tvtime/greedy.c
Sebastian Dröge 26cb95316c Disable the tomsmocomp algorithm for this release as it's buggy and has no C implementation yet.
Original commit message from CVS:
* configure.ac:
* gst/deinterlace2/Makefile.am:
* gst/deinterlace2/gstdeinterlace2.c:
(gst_deinterlace2_methods_get_type), (gst_deinterlace2_set_method),
(gst_deinterlace2_class_init), (gst_deinterlace2_init):
* gst/deinterlace2/gstdeinterlace2.h:
* gst/deinterlace2/tvtime/greedy.c:
(gst_deinterlace_method_greedy_l_class_init):
* gst/deinterlace2/tvtime/greedyh.c:
(gst_deinterlace_method_greedy_h_class_init):
* gst/deinterlace2/tvtime/vfir.c:
(gst_deinterlace_method_vfir_class_init):
Disable the tomsmocomp algorithm for this release as it's buggy
and has no C implementation yet.
Build the deinterlace2 plugin on all architectures but still mark it
as experimental.
Build the x86 inline assembly only if GCC inline assembly is supported
and only on x86 or amd64. Fixes bug #543286.
2008-07-18 08:34:06 +00:00

488 lines
15 KiB
C

/*
*
* GStreamer
* Copyright (c) 2000 Tom Barry All rights reserved.
* mmx.h port copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>.
*
* Copyright (C) 2008 Sebastian Dröge <slomo@collabora.co.uk>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
/*
* Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry
* and Billy Biggs.
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "_stdint.h"
#include "gstdeinterlace2.h"
#include <string.h>
#define GST_TYPE_DEINTERLACE_METHOD_GREEDY_L (gst_deinterlace_method_greedy_l_get_type ())
#define GST_IS_DEINTERLACE_METHOD_GREEDY_L(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L))
#define GST_IS_DEINTERLACE_METHOD_GREEDY_L_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L))
#define GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyLClass))
#define GST_DEINTERLACE_METHOD_GREEDY_L(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyL))
#define GST_DEINTERLACE_METHOD_GREEDY_L_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyLClass))
#define GST_DEINTERLACE_METHOD_GREEDY_L_CAST(obj) ((GstDeinterlaceMethodGreedyL*)(obj))
GType gst_deinterlace_method_greedy_l_get_type (void);
typedef struct
{
GstDeinterlaceMethod parent;
guint max_comb;
} GstDeinterlaceMethodGreedyL;
typedef struct
{
GstDeinterlaceMethodClass parent_class;
void (*scanline) (GstDeinterlaceMethodGreedyL * self, uint8_t * L2,
uint8_t * L1, uint8_t * L3, uint8_t * L2P, uint8_t * Dest, int size);
} GstDeinterlaceMethodGreedyLClass;
// This is a simple lightweight DeInterlace method that uses little CPU time
// but gives very good results for low or intermedite motion.
// It defers frames by one field, but that does not seem to produce noticeable
// lip sync problems.
//
// The method used is to take either the older or newer weave pixel depending
// upon which give the smaller comb factor, and then clip to avoid large damage
// when wrong.
//
// I'd intended this to be part of a larger more elaborate method added to
// Blended Clip but this give too good results for the CPU to ignore here.
static inline void
deinterlace_greedy_packed422_scanline_c (GstDeinterlaceMethodGreedyL * self,
uint8_t * m0, uint8_t * t1,
uint8_t * b1, uint8_t * m2, uint8_t * output, int width)
{
int avg, l2_diff, lp2_diff, max, min, best;
guint max_comb = self->max_comb;
// L2 == m0
// L1 == t1
// L3 == b1
// LP2 == m2
while (width--) {
avg = (*t1 + *b1) / 2;
l2_diff = ABS (*m0 - avg);
lp2_diff = ABS (*m2 - avg);
if (l2_diff > lp2_diff)
best = *m2;
else
best = *m0;
max = MAX (*t1, *b1);
min = MIN (*t1, *b1);
if (max < 256 - max_comb)
max += max_comb;
else
max = 255;
if (min > max_comb)
min -= max_comb;
else
min = 0;
*output = CLAMP (best, min, max);
// Advance to the next set of pixels.
output += 1;
m0 += 1;
t1 += 1;
b1 += 1;
m2 += 1;
}
}
#ifdef BUILD_X86_ASM
#include "mmx.h"
static void
deinterlace_greedy_packed422_scanline_mmx (GstDeinterlaceMethodGreedyL * self,
uint8_t * m0, uint8_t * t1,
uint8_t * b1, uint8_t * m2, uint8_t * output, int width)
{
mmx_t MaxComb;
mmx_t ShiftMask;
// How badly do we let it weave? 0-255
MaxComb.ub[0] = self->max_comb;
MaxComb.ub[1] = self->max_comb;
MaxComb.ub[2] = self->max_comb;
MaxComb.ub[3] = self->max_comb;
MaxComb.ub[4] = self->max_comb;
MaxComb.ub[5] = self->max_comb;
MaxComb.ub[6] = self->max_comb;
MaxComb.ub[7] = self->max_comb;
ShiftMask.ub[0] = 0x7f;
ShiftMask.ub[1] = 0x7f;
ShiftMask.ub[2] = 0x7f;
ShiftMask.ub[3] = 0x7f;
ShiftMask.ub[4] = 0x7f;
ShiftMask.ub[5] = 0x7f;
ShiftMask.ub[6] = 0x7f;
ShiftMask.ub[7] = 0x7f;
// L2 == m0
// L1 == t1
// L3 == b1
// LP2 == m2
movq_m2r (MaxComb, mm6);
for (; width > 7; width -= 8) {
movq_m2r (*t1, mm1); // L1
movq_m2r (*m0, mm2); // L2
movq_m2r (*b1, mm3); // L3
movq_m2r (*m2, mm0); // LP2
// average L1 and L3 leave result in mm4
movq_r2r (mm1, mm4); // L1
movq_r2r (mm3, mm5); // L3
psrlw_i2r (1, mm4); // L1/2
pand_m2r (ShiftMask, mm4);
psrlw_i2r (1, mm5); // L3/2
pand_m2r (ShiftMask, mm5);
paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
// get abs value of possible L2 comb
movq_r2r (mm2, mm7); // L2
psubusb_r2r (mm4, mm7); // L2 - avg
movq_r2r (mm4, mm5); // avg
psubusb_r2r (mm2, mm5); // avg - L2
por_r2r (mm7, mm5); // abs(avg-L2)
// get abs value of possible LP2 comb
movq_r2r (mm0, mm7); // LP2
psubusb_r2r (mm4, mm7); // LP2 - avg
psubusb_r2r (mm0, mm4); // avg - LP2
por_r2r (mm7, mm4); // abs(avg-LP2)
// use L2 or LP2 depending upon which makes smaller comb
psubusb_r2r (mm5, mm4); // see if it goes to zero
psubusb_r2r (mm5, mm5); // 0
pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
pcmpeqb_r2r (mm4, mm5); // opposite of mm4
// if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
por_r2r (mm5, mm4); // may the best win
// Now lets clip our chosen value to be not outside of the range
// of the high/low range L1-L3 by more than abs(L1-L3)
// This allows some comb but limits the damages and also allows more
// detail than a boring oversmoothed clip.
movq_r2r (mm1, mm2); // copy L1
psubusb_r2r (mm3, mm2); // - L3, with saturation
paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
pcmpeqb_r2r (mm7, mm7); // all ffffffff
psubusb_r2r (mm1, mm7); // - L1
paddusb_r2r (mm7, mm3); // add, may sat at fff..
psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
// allow the value to be above the high or below the low by amt of MaxComb
paddusb_r2r (mm6, mm2); // increase max by diff
psubusb_r2r (mm6, mm3); // lower min by diff
psubusb_r2r (mm3, mm4); // best - Min
paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
pcmpeqb_r2r (mm7, mm7); // all ffffffff
psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
paddusb_r2r (mm7, mm2); // add may sat at FFF..
psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
movq_r2m (mm2, *output); // move in our clipped best
// Advance to the next set of pixels.
output += 8;
m0 += 8;
t1 += 8;
b1 += 8;
m2 += 8;
}
emms ();
if (width > 0)
deinterlace_greedy_packed422_scanline_c (self, m0, t1, b1, m2, output,
width);
}
#include "sse.h"
static void
deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlaceMethodGreedyL *
self, uint8_t * m0, uint8_t * t1, uint8_t * b1, uint8_t * m2,
uint8_t * output, int width)
{
mmx_t MaxComb;
// How badly do we let it weave? 0-255
MaxComb.ub[0] = self->max_comb;
MaxComb.ub[1] = self->max_comb;
MaxComb.ub[2] = self->max_comb;
MaxComb.ub[3] = self->max_comb;
MaxComb.ub[4] = self->max_comb;
MaxComb.ub[5] = self->max_comb;
MaxComb.ub[6] = self->max_comb;
MaxComb.ub[7] = self->max_comb;
// L2 == m0
// L1 == t1
// L3 == b1
// LP2 == m2
movq_m2r (MaxComb, mm6);
for (; width > 7; width -= 8) {
movq_m2r (*t1, mm1); // L1
movq_m2r (*m0, mm2); // L2
movq_m2r (*b1, mm3); // L3
movq_m2r (*m2, mm0); // LP2
// average L1 and L3 leave result in mm4
movq_r2r (mm1, mm4); // L1
pavgb_r2r (mm3, mm4); // (L1 + L3)/2
// get abs value of possible L2 comb
movq_r2r (mm2, mm7); // L2
psubusb_r2r (mm4, mm7); // L2 - avg
movq_r2r (mm4, mm5); // avg
psubusb_r2r (mm2, mm5); // avg - L2
por_r2r (mm7, mm5); // abs(avg-L2)
// get abs value of possible LP2 comb
movq_r2r (mm0, mm7); // LP2
psubusb_r2r (mm4, mm7); // LP2 - avg
psubusb_r2r (mm0, mm4); // avg - LP2
por_r2r (mm7, mm4); // abs(avg-LP2)
// use L2 or LP2 depending upon which makes smaller comb
psubusb_r2r (mm5, mm4); // see if it goes to zero
pxor_r2r (mm5, mm5); // 0
pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
pcmpeqb_r2r (mm4, mm5); // opposite of mm4
// if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
por_r2r (mm5, mm4); // may the best win
// Now lets clip our chosen value to be not outside of the range
// of the high/low range L1-L3 by more than abs(L1-L3)
// This allows some comb but limits the damages and also allows more
// detail than a boring oversmoothed clip.
movq_r2r (mm1, mm2); // copy L1
pmaxub_r2r (mm3, mm2); // now = Max(L1,L3)
pminub_r2r (mm1, mm3); // now = Min(L1,L3)
// allow the value to be above the high or below the low by amt of MaxComb
paddusb_r2r (mm6, mm2); // increase max by diff
psubusb_r2r (mm6, mm3); // lower min by diff
pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
pminub_r2r (mm4, mm2); // now = Min( Max(best, Min(L1,L3)), L2 )=L2 clipped
movq_r2m (mm2, *output); // move in our clipped best
// Advance to the next set of pixels.
output += 8;
m0 += 8;
t1 += 8;
b1 += 8;
m2 += 8;
}
emms ();
if (width > 0)
deinterlace_greedy_packed422_scanline_c (self, m0, t1, b1, m2, output,
width);
}
#endif
static void
deinterlace_frame_di_greedy (GstDeinterlaceMethod * d_method,
GstDeinterlace2 * object)
{
GstDeinterlaceMethodGreedyL *self =
GST_DEINTERLACE_METHOD_GREEDY_L (d_method);
GstDeinterlaceMethodGreedyLClass *klass =
GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS (self);
int InfoIsOdd = 0;
int Line;
unsigned int Pitch = object->field_stride;
unsigned char *L1; // ptr to Line1, of 3
unsigned char *L2; // ptr to Line2, the weave line
unsigned char *L3; // ptr to Line3
unsigned char *L2P; // ptr to prev Line2
unsigned char *Dest = GST_BUFFER_DATA (object->out_buf);
// copy first even line no matter what, and the first odd line if we're
// processing an EVEN field. (note diff from other deint rtns.)
if (object->field_history[object->history_count - 1].flags ==
PICTURE_INTERLACED_BOTTOM) {
InfoIsOdd = 1;
L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
L2 = GST_BUFFER_DATA (object->field_history[object->history_count - 1].buf);
L3 = L1 + Pitch;
L2P =
GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf);
// copy first even line
memcpy (Dest, L1, object->line_length);
Dest += object->output_stride;
} else {
InfoIsOdd = 0;
L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
L2 = GST_BUFFER_DATA (object->field_history[object->history_count -
1].buf) + Pitch;
L3 = L1 + Pitch;
L2P =
GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf) +
Pitch;
// copy first even line
memcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
object->line_length);
Dest += object->output_stride;
// then first odd line
memcpy (Dest, L1, object->line_length);
Dest += object->output_stride;
}
for (Line = 0; Line < (object->field_height - 1); ++Line) {
klass->scanline (self, L2, L1, L3, L2P, Dest, object->line_length);
Dest += object->output_stride;
memcpy (Dest, L3, object->line_length);
Dest += object->output_stride;
L1 += Pitch;
L2 += Pitch;
L3 += Pitch;
L2P += Pitch;
}
if (InfoIsOdd) {
memcpy (Dest, L2, object->line_length);
}
}
G_DEFINE_TYPE (GstDeinterlaceMethodGreedyL, gst_deinterlace_method_greedy_l,
GST_TYPE_DEINTERLACE_METHOD);
enum
{
ARG_0,
ARG_MAX_COMB
};
static void
gst_deinterlace_method_greedy_l_set_property (GObject * object, guint prop_id,
const GValue * value, GParamSpec * pspec)
{
GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (object);
switch (prop_id) {
case ARG_MAX_COMB:
self->max_comb = g_value_get_uint (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
}
}
static void
gst_deinterlace_method_greedy_l_get_property (GObject * object, guint prop_id,
GValue * value, GParamSpec * pspec)
{
GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (object);
switch (prop_id) {
case ARG_MAX_COMB:
g_value_set_uint (value, self->max_comb);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
}
}
static void
gst_deinterlace_method_greedy_l_class_init (GstDeinterlaceMethodGreedyLClass *
klass)
{
GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
GObjectClass *gobject_class = (GObjectClass *) klass;
#ifdef BUILD_X86_ASM
guint cpu_flags = oil_cpu_get_flags ();
#endif
gobject_class->set_property = gst_deinterlace_method_greedy_l_set_property;
gobject_class->get_property = gst_deinterlace_method_greedy_l_get_property;
g_object_class_install_property (gobject_class, ARG_MAX_COMB,
g_param_spec_uint ("max-comb",
"Max comb",
"Max Comb", 0, 255, 15, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
);
dim_class->fields_required = 4;
dim_class->deinterlace_frame = deinterlace_frame_di_greedy;
dim_class->name = "Motion Adaptive: Simple Detection";
dim_class->nick = "greedyl";
dim_class->latency = 1;
#ifdef BUILD_X86_ASM
if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
klass->scanline = deinterlace_greedy_packed422_scanline_mmxext;
} else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
klass->scanline = deinterlace_greedy_packed422_scanline_mmx;
} else {
klass->scanline = deinterlace_greedy_packed422_scanline_c;
}
#else
klass->scanline = deinterlace_greedy_packed422_scanline_c;
#endif
}
static void
gst_deinterlace_method_greedy_l_init (GstDeinterlaceMethodGreedyL * self)
{
self->max_comb = 15;
}