/* * * GStreamer * Copyright (c) 2000 Tom Barry All rights reserved. * mmx.h port copyright (c) 2002 Billy Biggs . * * Copyright (C) 2008,2010 Sebastian Dröge * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ /* * Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry * and Billy Biggs. * See: http://bugzilla.gnome.org/show_bug.cgi?id=163578 */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "gstdeinterlacemethod.h" #include #ifdef HAVE_ORC #include #endif #include "tvtime.h" #define GST_TYPE_DEINTERLACE_METHOD_GREEDY_L (gst_deinterlace_method_greedy_l_get_type ()) #define GST_IS_DEINTERLACE_METHOD_GREEDY_L(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L)) #define GST_IS_DEINTERLACE_METHOD_GREEDY_L_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L)) #define GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyLClass)) #define GST_DEINTERLACE_METHOD_GREEDY_L(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyL)) #define GST_DEINTERLACE_METHOD_GREEDY_L_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_DEINTERLACE_METHOD_GREEDY_L, GstDeinterlaceMethodGreedyLClass)) #define GST_DEINTERLACE_METHOD_GREEDY_L_CAST(obj) ((GstDeinterlaceMethodGreedyL*)(obj)) GType gst_deinterlace_method_greedy_l_get_type (void); typedef struct { GstDeinterlaceMethod parent; guint max_comb; } GstDeinterlaceMethodGreedyL; typedef void (*GreedyLScanlineFunction) (GstDeinterlaceMethodGreedyL * self, const guint8 * L2, const guint8 * L1, const guint8 * L3, const guint8 * L2P, guint8 * Dest, gint width); typedef struct { GstDeinterlaceMethodClass parent_class; GreedyLScanlineFunction scanline; } GstDeinterlaceMethodGreedyLClass; // This is a simple lightweight DeInterlace method that uses little CPU time // but gives very good results for low or intermedite motion. // It defers frames by one field, but that does not seem to produce noticeable // lip sync problems. // // The method used is to take either the older or newer weave pixel depending // upon which give the smaller comb factor, and then clip to avoid large damage // when wrong. // // I'd intended this to be part of a larger more elaborate method added to // Blended Clip but this give too good results for the CPU to ignore here. static inline void deinterlace_greedy_scanline_c (GstDeinterlaceMethodGreedyL * self, const guint8 * m0, const guint8 * t1, const guint8 * b1, const guint8 * m2, guint8 * output, gint width) { gint avg, l2_diff, lp2_diff, max, min, best; guint max_comb = self->max_comb; // L2 == m0 // L1 == t1 // L3 == b1 // LP2 == m2 while (width--) { avg = (*t1 + *b1) / 2; l2_diff = ABS (*m0 - avg); lp2_diff = ABS (*m2 - avg); if (l2_diff > lp2_diff) best = *m2; else best = *m0; max = MAX (*t1, *b1); min = MIN (*t1, *b1); if (max < 256 - max_comb) max += max_comb; else max = 255; if (min > max_comb) min -= max_comb; else min = 0; *output = CLAMP (best, min, max); // Advance to the next set of pixels. output += 1; m0 += 1; t1 += 1; b1 += 1; m2 += 1; } } static inline void deinterlace_greedy_scanline_orc (GstDeinterlaceMethodGreedyL * self, const guint8 * m0, const guint8 * t1, const guint8 * b1, const guint8 * m2, guint8 * output, gint width) { deinterlace_line_greedy (output, m0, t1, b1, m2, self->max_comb, width); } #ifdef BUILD_X86_ASM #include "mmx.h" static void deinterlace_greedy_scanline_mmx (GstDeinterlaceMethodGreedyL * self, const guint8 * m0, const guint8 * t1, const guint8 * b1, const guint8 * m2, guint8 * output, gint width) { mmx_t MaxComb; mmx_t ShiftMask; // How badly do we let it weave? 0-255 MaxComb.ub[0] = self->max_comb; MaxComb.ub[1] = self->max_comb; MaxComb.ub[2] = self->max_comb; MaxComb.ub[3] = self->max_comb; MaxComb.ub[4] = self->max_comb; MaxComb.ub[5] = self->max_comb; MaxComb.ub[6] = self->max_comb; MaxComb.ub[7] = self->max_comb; ShiftMask.ub[0] = 0x7f; ShiftMask.ub[1] = 0x7f; ShiftMask.ub[2] = 0x7f; ShiftMask.ub[3] = 0x7f; ShiftMask.ub[4] = 0x7f; ShiftMask.ub[5] = 0x7f; ShiftMask.ub[6] = 0x7f; ShiftMask.ub[7] = 0x7f; // L2 == m0 // L1 == t1 // L3 == b1 // LP2 == m2 movq_m2r (MaxComb, mm6); for (; width > 7; width -= 8) { movq_m2r (*t1, mm1); // L1 movq_m2r (*m0, mm2); // L2 movq_m2r (*b1, mm3); // L3 movq_m2r (*m2, mm0); // LP2 // average L1 and L3 leave result in mm4 movq_r2r (mm1, mm4); // L1 movq_r2r (mm3, mm5); // L3 psrlw_i2r (1, mm4); // L1/2 pand_m2r (ShiftMask, mm4); psrlw_i2r (1, mm5); // L3/2 pand_m2r (ShiftMask, mm5); paddusb_r2r (mm5, mm4); // (L1 + L3) / 2 // get abs value of possible L2 comb movq_r2r (mm2, mm7); // L2 psubusb_r2r (mm4, mm7); // L2 - avg movq_r2r (mm4, mm5); // avg psubusb_r2r (mm2, mm5); // avg - L2 por_r2r (mm7, mm5); // abs(avg-L2) // get abs value of possible LP2 comb movq_r2r (mm0, mm7); // LP2 psubusb_r2r (mm4, mm7); // LP2 - avg psubusb_r2r (mm0, mm4); // avg - LP2 por_r2r (mm7, mm4); // abs(avg-LP2) // use L2 or LP2 depending upon which makes smaller comb psubusb_r2r (mm5, mm4); // see if it goes to zero psubusb_r2r (mm5, mm5); // 0 pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0 pcmpeqb_r2r (mm4, mm5); // opposite of mm4 // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55 pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0 pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0 por_r2r (mm5, mm4); // may the best win // Now lets clip our chosen value to be not outside of the range // of the high/low range L1-L3 by more than abs(L1-L3) // This allows some comb but limits the damages and also allows more // detail than a boring oversmoothed clip. movq_r2r (mm1, mm2); // copy L1 psubusb_r2r (mm3, mm2); // - L3, with saturation paddusb_r2r (mm3, mm2); // now = Max(L1,L3) pcmpeqb_r2r (mm7, mm7); // all ffffffff psubusb_r2r (mm1, mm7); // - L1 paddusb_r2r (mm7, mm3); // add, may sat at fff.. psubusb_r2r (mm7, mm3); // now = Min(L1,L3) // allow the value to be above the high or below the low by amt of MaxComb paddusb_r2r (mm6, mm2); // increase max by diff psubusb_r2r (mm6, mm3); // lower min by diff psubusb_r2r (mm3, mm4); // best - Min paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3) pcmpeqb_r2r (mm7, mm7); // all ffffffff psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3) paddusb_r2r (mm7, mm2); // add may sat at FFF.. psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped movq_r2m (mm2, *output); // move in our clipped best // Advance to the next set of pixels. output += 8; m0 += 8; t1 += 8; b1 += 8; m2 += 8; } emms (); if (width > 0) deinterlace_greedy_scanline_c (self, m0, t1, b1, m2, output, width); } #include "sse.h" static void deinterlace_greedy_scanline_mmxext (GstDeinterlaceMethodGreedyL * self, const guint8 * m0, const guint8 * t1, const guint8 * b1, const guint8 * m2, guint8 * output, gint width) { mmx_t MaxComb; // How badly do we let it weave? 0-255 MaxComb.ub[0] = self->max_comb; MaxComb.ub[1] = self->max_comb; MaxComb.ub[2] = self->max_comb; MaxComb.ub[3] = self->max_comb; MaxComb.ub[4] = self->max_comb; MaxComb.ub[5] = self->max_comb; MaxComb.ub[6] = self->max_comb; MaxComb.ub[7] = self->max_comb; // L2 == m0 // L1 == t1 // L3 == b1 // LP2 == m2 movq_m2r (MaxComb, mm6); for (; width > 7; width -= 8) { movq_m2r (*t1, mm1); // L1 movq_m2r (*m0, mm2); // L2 movq_m2r (*b1, mm3); // L3 movq_m2r (*m2, mm0); // LP2 // average L1 and L3 leave result in mm4 movq_r2r (mm1, mm4); // L1 pavgb_r2r (mm3, mm4); // (L1 + L3)/2 // get abs value of possible L2 comb movq_r2r (mm2, mm7); // L2 psubusb_r2r (mm4, mm7); // L2 - avg movq_r2r (mm4, mm5); // avg psubusb_r2r (mm2, mm5); // avg - L2 por_r2r (mm7, mm5); // abs(avg-L2) // get abs value of possible LP2 comb movq_r2r (mm0, mm7); // LP2 psubusb_r2r (mm4, mm7); // LP2 - avg psubusb_r2r (mm0, mm4); // avg - LP2 por_r2r (mm7, mm4); // abs(avg-LP2) // use L2 or LP2 depending upon which makes smaller comb psubusb_r2r (mm5, mm4); // see if it goes to zero pxor_r2r (mm5, mm5); // 0 pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0 pcmpeqb_r2r (mm4, mm5); // opposite of mm4 // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55 pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0 pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0 por_r2r (mm5, mm4); // may the best win // Now lets clip our chosen value to be not outside of the range // of the high/low range L1-L3 by more than abs(L1-L3) // This allows some comb but limits the damages and also allows more // detail than a boring oversmoothed clip. movq_r2r (mm1, mm2); // copy L1 pmaxub_r2r (mm3, mm2); // now = Max(L1,L3) pminub_r2r (mm1, mm3); // now = Min(L1,L3) // allow the value to be above the high or below the low by amt of MaxComb paddusb_r2r (mm6, mm2); // increase max by diff psubusb_r2r (mm6, mm3); // lower min by diff pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3) pminub_r2r (mm4, mm2); // now = Min( Max(best, Min(L1,L3)), L2 )=L2 clipped movq_r2m (mm2, *output); // move in our clipped best // Advance to the next set of pixels. output += 8; m0 += 8; t1 += 8; b1 += 8; m2 += 8; } emms (); if (width > 0) deinterlace_greedy_scanline_c (self, m0, t1, b1, m2, output, width); } #endif static void deinterlace_frame_di_greedy_packed (GstDeinterlaceMethod * method, const GstDeinterlaceField * history, guint history_count, GstBuffer * outbuf) { GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (method); GstDeinterlaceMethodGreedyLClass *klass = GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS (self); gint InfoIsOdd = 0; gint Line; gint RowStride = method->row_stride[0]; gint FieldHeight = method->frame_height / 2; gint Pitch = method->row_stride[0] * 2; const guint8 *L1; // ptr to Line1, of 3 const guint8 *L2; // ptr to Line2, the weave line const guint8 *L3; // ptr to Line3 const guint8 *L2P; // ptr to prev Line2 guint8 *Dest = GST_BUFFER_DATA (outbuf); // copy first even line no matter what, and the first odd line if we're // processing an EVEN field. (note diff from other deint rtns.) if (history[history_count - 1].flags == PICTURE_INTERLACED_BOTTOM) { InfoIsOdd = 1; L1 = GST_BUFFER_DATA (history[history_count - 2].buf); if (history[history_count - 2].flags & PICTURE_INTERLACED_BOTTOM) L1 += RowStride; L2 = GST_BUFFER_DATA (history[history_count - 1].buf); if (history[history_count - 1].flags & PICTURE_INTERLACED_BOTTOM) L2 += RowStride; L3 = L1 + Pitch; L2P = GST_BUFFER_DATA (history[history_count - 3].buf); if (history[history_count - 3].flags & PICTURE_INTERLACED_BOTTOM) L2P += RowStride; // copy first even line memcpy (Dest, L1, RowStride); Dest += RowStride; } else { InfoIsOdd = 0; L1 = GST_BUFFER_DATA (history[history_count - 2].buf); if (history[history_count - 2].flags & PICTURE_INTERLACED_BOTTOM) L1 += RowStride; L2 = GST_BUFFER_DATA (history[history_count - 1].buf) + Pitch; if (history[history_count - 1].flags & PICTURE_INTERLACED_BOTTOM) L2 += RowStride; L3 = L1 + Pitch; L2P = GST_BUFFER_DATA (history[history_count - 3].buf) + Pitch; if (history[history_count - 3].flags & PICTURE_INTERLACED_BOTTOM) L2P += RowStride; // copy first even line memcpy (Dest, L1, RowStride); Dest += RowStride; // then first odd line memcpy (Dest, L1, RowStride); Dest += RowStride; } for (Line = 0; Line < (FieldHeight - 1); ++Line) { klass->scanline (self, L2, L1, L3, L2P, Dest, RowStride); Dest += RowStride; memcpy (Dest, L3, RowStride); Dest += RowStride; L1 += Pitch; L2 += Pitch; L3 += Pitch; L2P += Pitch; } if (InfoIsOdd) { memcpy (Dest, L2, RowStride); } } static void deinterlace_frame_di_greedy_planar_plane (GstDeinterlaceMethodGreedyL * self, const guint8 * L1, const guint8 * L2, const guint8 * L3, const guint8 * L2P, guint8 * Dest, gint RowStride, gint FieldHeight, gint Pitch, gint InfoIsOdd, GreedyLScanlineFunction scanline) { gint Line; // copy first even line no matter what, and the first odd line if we're // processing an EVEN field. (note diff from other deint rtns.) if (InfoIsOdd) { // copy first even line memcpy (Dest, L1, RowStride); Dest += RowStride; } else { // copy first even line memcpy (Dest, L1, RowStride); Dest += RowStride; // then first odd line memcpy (Dest, L1, RowStride); Dest += RowStride; } for (Line = 0; Line < (FieldHeight - 1); ++Line) { scanline (self, L2, L1, L3, L2P, Dest, RowStride); Dest += RowStride; memcpy (Dest, L3, RowStride); Dest += RowStride; L1 += Pitch; L2 += Pitch; L3 += Pitch; L2P += Pitch; } if (InfoIsOdd) { memcpy (Dest, L2, RowStride); } } static void deinterlace_frame_di_greedy_planar (GstDeinterlaceMethod * method, const GstDeinterlaceField * history, guint history_count, GstBuffer * outbuf) { GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (method); GstDeinterlaceMethodGreedyLClass *klass = GST_DEINTERLACE_METHOD_GREEDY_L_GET_CLASS (self); gint InfoIsOdd; gint RowStride; gint FieldHeight; gint Pitch; const guint8 *L1; // ptr to Line1, of 3 const guint8 *L2; // ptr to Line2, the weave line const guint8 *L3; // ptr to Line3 const guint8 *L2P; // ptr to prev Line2 guint8 *Dest; gint i; gint Offset; GreedyLScanlineFunction scanline = klass->scanline; for (i = 0; i < 3; i++) { Offset = method->offset[i]; InfoIsOdd = (history[history_count - 1].flags == PICTURE_INTERLACED_BOTTOM); RowStride = method->row_stride[i]; FieldHeight = method->height[i] / 2; Pitch = method->row_stride[i] * 2; Dest = GST_BUFFER_DATA (outbuf) + Offset; L1 = GST_BUFFER_DATA (history[history_count - 2].buf) + Offset; if (history[history_count - 2].flags & PICTURE_INTERLACED_BOTTOM) L1 += RowStride; L2 = GST_BUFFER_DATA (history[history_count - 1].buf) + Offset; if (history[history_count - 1].flags & PICTURE_INTERLACED_BOTTOM) L2 += RowStride; L3 = L1 + Pitch; L2P = GST_BUFFER_DATA (history[history_count - 3].buf) + Offset; if (history[history_count - 3].flags & PICTURE_INTERLACED_BOTTOM) L2P += RowStride; deinterlace_frame_di_greedy_planar_plane (self, L1, L2, L3, L2P, Dest, RowStride, FieldHeight, Pitch, InfoIsOdd, scanline); } } G_DEFINE_TYPE (GstDeinterlaceMethodGreedyL, gst_deinterlace_method_greedy_l, GST_TYPE_DEINTERLACE_METHOD); enum { PROP_0, PROP_MAX_COMB }; static void gst_deinterlace_method_greedy_l_set_property (GObject * object, guint prop_id, const GValue * value, GParamSpec * pspec) { GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (object); switch (prop_id) { case PROP_MAX_COMB: self->max_comb = g_value_get_uint (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); } } static void gst_deinterlace_method_greedy_l_get_property (GObject * object, guint prop_id, GValue * value, GParamSpec * pspec) { GstDeinterlaceMethodGreedyL *self = GST_DEINTERLACE_METHOD_GREEDY_L (object); switch (prop_id) { case PROP_MAX_COMB: g_value_set_uint (value, self->max_comb); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); } } static void gst_deinterlace_method_greedy_l_class_init (GstDeinterlaceMethodGreedyLClass * klass) { GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass; GObjectClass *gobject_class = (GObjectClass *) klass; #ifdef BUILD_X86_ASM guint cpu_flags = orc_target_get_default_flags (orc_target_get_by_name ("mmx")); #endif gobject_class->set_property = gst_deinterlace_method_greedy_l_set_property; gobject_class->get_property = gst_deinterlace_method_greedy_l_get_property; g_object_class_install_property (gobject_class, PROP_MAX_COMB, g_param_spec_uint ("max-comb", "Max comb", "Max Comb", 0, 255, 15, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS) ); dim_class->fields_required = 4; dim_class->name = "Motion Adaptive: Simple Detection"; dim_class->nick = "greedyl"; dim_class->latency = 1; dim_class->deinterlace_frame_yuy2 = deinterlace_frame_di_greedy_packed; dim_class->deinterlace_frame_yvyu = deinterlace_frame_di_greedy_packed; dim_class->deinterlace_frame_uyvy = deinterlace_frame_di_greedy_packed; dim_class->deinterlace_frame_y444 = deinterlace_frame_di_greedy_planar; dim_class->deinterlace_frame_y42b = deinterlace_frame_di_greedy_planar; dim_class->deinterlace_frame_i420 = deinterlace_frame_di_greedy_planar; dim_class->deinterlace_frame_yv12 = deinterlace_frame_di_greedy_planar; dim_class->deinterlace_frame_y41b = deinterlace_frame_di_greedy_planar; dim_class->deinterlace_frame_ayuv = deinterlace_frame_di_greedy_planar; dim_class->deinterlace_frame_argb = deinterlace_frame_di_greedy_packed; dim_class->deinterlace_frame_rgba = deinterlace_frame_di_greedy_packed; dim_class->deinterlace_frame_abgr = deinterlace_frame_di_greedy_packed; dim_class->deinterlace_frame_bgra = deinterlace_frame_di_greedy_packed; dim_class->deinterlace_frame_rgb = deinterlace_frame_di_greedy_packed; dim_class->deinterlace_frame_bgr = deinterlace_frame_di_greedy_packed; #ifdef BUILD_X86_ASM if (cpu_flags & ORC_TARGET_MMX_MMXEXT) { klass->scanline = deinterlace_greedy_scanline_mmxext; } else if (cpu_flags & ORC_TARGET_MMX_MMX) { klass->scanline = deinterlace_greedy_scanline_mmx; } else { klass->scanline = deinterlace_greedy_scanline_c; } #else klass->scanline = deinterlace_greedy_scanline_c; klass->scanline = deinterlace_greedy_scanline_orc; #endif } static void gst_deinterlace_method_greedy_l_init (GstDeinterlaceMethodGreedyL * self) { self->max_comb = 15; }