mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-26 17:18:15 +00:00
[MOVED FROM BAD 01/56] gst/deinterlace2/: Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer, which was relicensed to LGPL f...
Original commit message from CVS: Based on a patch by: Martin Eikermann <meiker at upb dot de> * gst/deinterlace2/Makefile.am: * gst/deinterlace2/gstdeinterlace2.c: (gst_deinterlace2_method_get_type), (gst_deinterlace2_fields_get_type), (gst_deinterlace2_field_layout_get_type), (gst_deinterlace2_base_init), (gst_deinterlace2_class_init), (gst_deinterlace2_init), (gst_deinterlace2_set_method), (gst_deinterlace2_set_property), (gst_deinterlace2_get_property), (gst_deinterlace2_finalize), (gst_deinterlace2_pop_history), (gst_deinterlace2_head_history), (gst_deinterlace2_push_history), (gst_deinterlace2_deinterlace_scanlines), (gst_deinterlace2_chain), (gst_deinterlace2_setcaps), (gst_deinterlace2_sink_event), (gst_deinterlace2_change_state), (gst_deinterlace2_src_event), (gst_deinterlace2_src_query), (gst_deinterlace2_src_query_types), (plugin_init): * gst/deinterlace2/gstdeinterlace2.h: * gst/deinterlace2/tvtime/greedy.c: (copy_scanline), (deinterlace_greedy_packed422_scanline_mmxext), (dscaler_greedyl_get_method): * gst/deinterlace2/tvtime/greedyh.asm: * gst/deinterlace2/tvtime/greedyh.c: (deinterlace_frame_di_greedyh), (dscaler_greedyh_get_method), (greedyh_init), (greedyh_filter_mmx), (greedyh_filter_3dnow), (greedyh_filter_sse): * gst/deinterlace2/tvtime/greedyh.h: * gst/deinterlace2/tvtime/greedyhmacros.h: * gst/deinterlace2/tvtime/mmx.h: * gst/deinterlace2/tvtime/plugins.h: * gst/deinterlace2/tvtime/speedtools.h: * gst/deinterlace2/tvtime/speedy.c: (multiply_alpha), (clip255), (comb_factor_packed422_scanline_mmx), (diff_factor_packed422_scanline_c), (diff_factor_packed422_scanline_mmx), (diff_packed422_block8x8_mmx), (diff_packed422_block8x8_c), (packed444_to_packed422_scanline_c), (packed422_to_packed444_scanline_c), (packed422_to_packed444_rec601_scanline_c), (vfilter_chroma_121_packed422_scanline_mmx), (vfilter_chroma_121_packed422_scanline_c), (vfilter_chroma_332_packed422_scanline_mmx), (vfilter_chroma_332_packed422_scanline_c), (kill_chroma_packed422_inplace_scanline_mmx), (kill_chroma_packed422_inplace_scanline_c), (invert_colour_packed422_inplace_scanline_mmx), (invert_colour_packed422_inplace_scanline_c), (mirror_packed422_inplace_scanline_c), (interpolate_packed422_scanline_c), (convert_uyvy_to_yuyv_scanline_mmx), (convert_uyvy_to_yuyv_scanline_c), (interpolate_packed422_scanline_mmx), (interpolate_packed422_scanline_mmxext), (blit_colour_packed422_scanline_c), (blit_colour_packed422_scanline_mmx), (blit_colour_packed422_scanline_mmxext), (blit_colour_packed4444_scanline_c), (blit_colour_packed4444_scanline_mmx), (blit_colour_packed4444_scanline_mmxext), (small_memcpy), (speedy_memcpy_c), (speedy_memcpy_mmx), (speedy_memcpy_mmxext), (blit_packed422_scanline_c), (blit_packed422_scanline_mmx), (blit_packed422_scanline_mmxext), (composite_colour4444_alpha_to_packed422_scanline_c), (composite_colour4444_alpha_to_packed422_scanline_mmxext), (composite_packed4444_alpha_to_packed422_scanline_c), (composite_packed4444_alpha_to_packed422_scanline_mmxext), (composite_packed4444_to_packed422_scanline_c), (composite_packed4444_to_packed422_scanline_mmxext), (composite_alphamask_to_packed4444_scanline_c), (composite_alphamask_to_packed4444_scanline_mmxext), (composite_alphamask_alpha_to_packed4444_scanline_c), (premultiply_packed4444_scanline_c), (premultiply_packed4444_scanline_mmxext), (blend_packed422_scanline_c), (blend_packed422_scanline_mmxext), (quarter_blit_vertical_packed422_scanline_mmxext), (quarter_blit_vertical_packed422_scanline_c), (subpix_blit_vertical_packed422_scanline_c), (a8_subpix_blit_scanline_c), (myround), (init_RGB_to_YCbCr_tables), (init_YCbCr_to_RGB_tables), (rgb24_to_packed444_rec601_scanline_c), (rgba32_to_packed4444_rec601_scanline_c), (packed444_to_rgb24_rec601_scanline_c), (packed444_to_nonpremultiplied_packed4444_scanline_c), (aspect_adjust_packed4444_scanline_c), (setup_speedy_calls), (speedy_get_accel): * gst/deinterlace2/tvtime/speedy.h: * gst/deinterlace2/tvtime/sse.h: * gst/deinterlace2/tvtime/tomsmocomp.c: (Fieldcopy), (deinterlace_frame_di_tomsmocomp), (dscaler_tomsmocomp_get_method), (tomsmocomp_init), (tomsmocomp_filter_mmx), (tomsmocomp_filter_3dnow), (tomsmocomp_filter_sse): * gst/deinterlace2/tvtime/tomsmocomp.h: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoop0A.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopEdgeA8.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA2.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddA6.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopOddAH2.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVA.inc: * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopVAH.inc: * gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc: * gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc: * gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc: * gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc: * gst/deinterlace2/tvtime/vfir.c: (deinterlace_line), (deinterlace_scanline_vfir), (copy_scanline), (dscaler_vfir_get_method): * gst/deinterlace2/tvtime/x86-64_macros.inc: Add a deinterlacer plugin based on the tvtime/DScaler deinterlacer, which was relicensed to LGPL for GStreamer and in theory provides better and faster results than the simple deinterlace element. Fixes bug #163578. Ported to GStreamer 0.10 but still not enabled or included in the build system by default because of bad artefacts caused by a bug somewhere and as it can be only build on x86/amd64 ATM and requires special CFLAGS. Will be fixed soon.
This commit is contained in:
parent
2a362d14c5
commit
0518c150c9
18 changed files with 7411 additions and 0 deletions
46
gst/deinterlace2/Makefile.am
Normal file
46
gst/deinterlace2/Makefile.am
Normal file
|
@ -0,0 +1,46 @@
|
|||
plugin_LTLIBRARIES = libgstdeinterlace2.la
|
||||
|
||||
libgstdeinterlace2_la_SOURCES = \
|
||||
gstdeinterlace2.c \
|
||||
tvtime/greedy.c \
|
||||
tvtime/greedyh.asm \
|
||||
tvtime/greedyh.c \
|
||||
tvtime/speedy.c \
|
||||
tvtime/vfir.c \
|
||||
tvtime/x86-64_macros.inc \
|
||||
tvtime/tomsmocomp.c \
|
||||
tvtime/tomsmocomp/SearchLoop0A.inc \
|
||||
tvtime/tomsmocomp/SearchLoopBottom.inc \
|
||||
tvtime/tomsmocomp/SearchLoopEdgeA8.inc \
|
||||
tvtime/tomsmocomp/SearchLoopEdgeA.inc \
|
||||
tvtime/tomsmocomp/SearchLoopOddA2.inc \
|
||||
tvtime/tomsmocomp/SearchLoopOddA6.inc \
|
||||
tvtime/tomsmocomp/SearchLoopOddAH2.inc \
|
||||
tvtime/tomsmocomp/SearchLoopOddAH.inc \
|
||||
tvtime/tomsmocomp/SearchLoopOddA.inc \
|
||||
tvtime/tomsmocomp/SearchLoopTop.inc \
|
||||
tvtime/tomsmocomp/SearchLoopVAH.inc \
|
||||
tvtime/tomsmocomp/SearchLoopVA.inc \
|
||||
tvtime/tomsmocomp/StrangeBob.inc \
|
||||
tvtime/tomsmocomp/TomsMoCompAll2.inc \
|
||||
tvtime/tomsmocomp/TomsMoCompAll.inc \
|
||||
tvtime/tomsmocomp/tomsmocompmacros.h \
|
||||
tvtime/tomsmocomp/WierdBob.inc
|
||||
|
||||
libgstdeinterlace2_la_CFLAGS = $(GST_CFLAGS) \
|
||||
$(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(LIBOIL_CFLAGS) -march=athlon-xp
|
||||
libgstdeinterlace2_la_LIBADD = $(GST_LIBS) \
|
||||
$(GST_PLUGINS_BASE_LIBS) -lgstvideo-$(GST_MAJORMINOR) $(GST_BASE_LIBS) $(LIBOIL_LIBS)
|
||||
libgstdeinterlace2_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS)
|
||||
|
||||
noinst_HEADERS = \
|
||||
gstdeinterlace2.h \
|
||||
tvtime/mmx.h \
|
||||
tvtime/sse.h \
|
||||
tvtime/greedyh.h \
|
||||
tvtime/greedyhmacros.h \
|
||||
tvtime/plugins.h \
|
||||
tvtime/speedtools.h \
|
||||
tvtime/speedy.h \
|
||||
tvtime/tomsmocomp.h
|
||||
|
893
gst/deinterlace2/gstdeinterlace2.c
Normal file
893
gst/deinterlace2/gstdeinterlace2.c
Normal file
|
@ -0,0 +1,893 @@
|
|||
/*
|
||||
* GStreamer
|
||||
* Copyright (C) 2005 Martin Eikermann <meiker@upb.de>
|
||||
* Copyright (C) 2008 Sebastian Dröge <slomo@circular-chaos.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "gstdeinterlace2.h"
|
||||
#include <gst/gst.h>
|
||||
#include <gst/video/video.h>
|
||||
|
||||
#include "tvtime/plugins.h"
|
||||
#include "tvtime/speedy.h"
|
||||
|
||||
GST_DEBUG_CATEGORY_STATIC (deinterlace2_debug);
|
||||
#define GST_CAT_DEFAULT (deinterlace2_debug)
|
||||
|
||||
/* Object signals and args */
|
||||
enum
|
||||
{
|
||||
LAST_SIGNAL
|
||||
};
|
||||
|
||||
/* Arguments */
|
||||
enum
|
||||
{
|
||||
ARG_0,
|
||||
ARG_METHOD,
|
||||
ARG_FIELDS,
|
||||
ARG_FIELD_LAYOUT
|
||||
};
|
||||
|
||||
#define GST_TYPE_DEINTERLACE2_METHOD (gst_deinterlace2_method_get_type ())
|
||||
static GType
|
||||
gst_deinterlace2_method_get_type (void)
|
||||
{
|
||||
static GType deinterlace2_method_type = 0;
|
||||
|
||||
static const GEnumValue method_types[] = {
|
||||
{GST_DEINTERLACE2_TOM, "Toms Motion Compensation", "tomsmc"},
|
||||
{GST_DEINTERLACE2_GREEDY_H, "Greedy High Motion", "greedyh"},
|
||||
{GST_DEINTERLACE2_GREEDY_L, "Greedy Low Motion", "greedyl"},
|
||||
{GST_DEINTERLACE2_VFIR, "Vertical Blur", "vfir"},
|
||||
{0, NULL, NULL},
|
||||
};
|
||||
|
||||
if (!deinterlace2_method_type) {
|
||||
deinterlace2_method_type =
|
||||
g_enum_register_static ("GstDeinterlace2Methods", method_types);
|
||||
}
|
||||
return deinterlace2_method_type;
|
||||
}
|
||||
|
||||
#define GST_TYPE_DEINTERLACE2_FIELDS (gst_deinterlace2_fields_get_type ())
|
||||
static GType
|
||||
gst_deinterlace2_fields_get_type (void)
|
||||
{
|
||||
static GType deinterlace2_fields_type = 0;
|
||||
|
||||
static const GEnumValue fields_types[] = {
|
||||
{GST_DEINTERLACE2_ALL, "All fields", "all"},
|
||||
{GST_DEINTERLACE2_TF, "Top fields only", "top"},
|
||||
{GST_DEINTERLACE2_BF, "Bottom fields only", "bottom"},
|
||||
{0, NULL, NULL},
|
||||
};
|
||||
|
||||
if (!deinterlace2_fields_type) {
|
||||
deinterlace2_fields_type =
|
||||
g_enum_register_static ("GstDeinterlace2Fields", fields_types);
|
||||
}
|
||||
return deinterlace2_fields_type;
|
||||
}
|
||||
|
||||
#define GST_TYPE_DEINTERLACE2_FIELD_LAYOUT (gst_deinterlace2_field_layout_get_type ())
|
||||
static GType
|
||||
gst_deinterlace2_field_layout_get_type (void)
|
||||
{
|
||||
static GType deinterlace2_field_layout_type = 0;
|
||||
|
||||
static const GEnumValue field_layout_types[] = {
|
||||
{GST_DEINTERLACE2_LAYOUT_AUTO, "Auto detection", "auto"},
|
||||
{GST_DEINTERLACE2_LAYOUT_TFF, "Top field first", "tff"},
|
||||
{GST_DEINTERLACE2_LAYOUT_BFF, "Bottom field first", "bff"},
|
||||
{0, NULL, NULL},
|
||||
};
|
||||
|
||||
if (!deinterlace2_field_layout_type) {
|
||||
deinterlace2_field_layout_type =
|
||||
g_enum_register_static ("GstDeinterlace2FieldLayout",
|
||||
field_layout_types);
|
||||
}
|
||||
return deinterlace2_field_layout_type;
|
||||
}
|
||||
|
||||
static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
|
||||
GST_PAD_SRC,
|
||||
GST_PAD_ALWAYS,
|
||||
GST_STATIC_CAPS (GST_VIDEO_CAPS_YUV ("YUY2"))
|
||||
);
|
||||
|
||||
static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
|
||||
GST_PAD_SINK,
|
||||
GST_PAD_ALWAYS,
|
||||
GST_STATIC_CAPS (GST_VIDEO_CAPS_YUV ("YUY2"))
|
||||
);
|
||||
|
||||
static void gst_deinterlace2_finalize (GObject * object);
|
||||
|
||||
static void gst_deinterlace2_set_property (GObject * object, guint prop_id,
|
||||
const GValue * value, GParamSpec * pspec);
|
||||
static void gst_deinterlace2_get_property (GObject * object, guint prop_id,
|
||||
GValue * value, GParamSpec * pspec);
|
||||
|
||||
static gboolean gst_deinterlace2_setcaps (GstPad * pad, GstCaps * caps);
|
||||
|
||||
static gboolean gst_deinterlace2_sink_event (GstPad * pad, GstEvent * event);
|
||||
|
||||
static GstFlowReturn gst_deinterlace2_chain (GstPad * pad, GstBuffer * buffer);
|
||||
|
||||
static GstStateChangeReturn gst_deinterlace2_change_state (GstElement * element,
|
||||
GstStateChange transition);
|
||||
|
||||
static gboolean gst_deinterlace2_src_event (GstPad * pad, GstEvent * event);
|
||||
|
||||
static gboolean gst_deinterlace2_src_query (GstPad * pad, GstQuery * query);
|
||||
|
||||
static const GstQueryType *gst_deinterlace2_src_query_types (GstPad * pad);
|
||||
|
||||
static void gst_deinterlace2_deinterlace_scanlines (GstDeinterlace2 * object);
|
||||
|
||||
GST_BOILERPLATE (GstDeinterlace2, gst_deinterlace2, GstElement,
|
||||
GST_TYPE_ELEMENT);
|
||||
|
||||
static void
|
||||
gst_deinterlace2_base_init (gpointer klass)
|
||||
{
|
||||
GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
|
||||
|
||||
gst_element_class_add_pad_template (element_class,
|
||||
gst_static_pad_template_get (&src_templ));
|
||||
gst_element_class_add_pad_template (element_class,
|
||||
gst_static_pad_template_get (&sink_templ));
|
||||
|
||||
gst_element_class_set_details_simple (element_class,
|
||||
"Deinterlacer",
|
||||
"Filter/Video",
|
||||
"Deinterlace Methods ported from DScaler/TvTime",
|
||||
"Martin Eikermann <meiker@upb.de>, "
|
||||
"Sebastian Dröge <slomo@circular-chaos.org>");
|
||||
}
|
||||
|
||||
static void
|
||||
gst_deinterlace2_class_init (GstDeinterlace2Class * klass)
|
||||
{
|
||||
GObjectClass *gobject_class = (GObjectClass *) klass;
|
||||
|
||||
GstElementClass *element_class = (GstElementClass *) klass;
|
||||
|
||||
gobject_class->set_property = gst_deinterlace2_set_property;
|
||||
gobject_class->get_property = gst_deinterlace2_get_property;
|
||||
gobject_class->finalize = gst_deinterlace2_finalize;
|
||||
|
||||
g_object_class_install_property (gobject_class, ARG_METHOD,
|
||||
g_param_spec_enum ("method",
|
||||
"Method",
|
||||
"Deinterlace Method",
|
||||
GST_TYPE_DEINTERLACE2_METHOD,
|
||||
GST_DEINTERLACE2_GREEDY_H, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
|
||||
);
|
||||
|
||||
g_object_class_install_property (gobject_class, ARG_FIELDS,
|
||||
g_param_spec_enum ("fields",
|
||||
"fields",
|
||||
"Fields to use for deinterlacing",
|
||||
GST_TYPE_DEINTERLACE2_FIELDS,
|
||||
GST_DEINTERLACE2_ALL, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
|
||||
);
|
||||
|
||||
|
||||
g_object_class_install_property (gobject_class, ARG_FIELDS,
|
||||
g_param_spec_enum ("tff",
|
||||
"tff",
|
||||
"Deinterlace top field first",
|
||||
GST_TYPE_DEINTERLACE2_FIELD_LAYOUT,
|
||||
GST_DEINTERLACE2_LAYOUT_AUTO,
|
||||
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)
|
||||
);
|
||||
|
||||
element_class->change_state =
|
||||
GST_DEBUG_FUNCPTR (gst_deinterlace2_change_state);
|
||||
}
|
||||
|
||||
static void
|
||||
gst_deinterlace2_init (GstDeinterlace2 * object, GstDeinterlace2Class * klass)
|
||||
{
|
||||
object->sinkpad = gst_pad_new_from_static_template (&sink_templ, "sink");
|
||||
gst_pad_set_chain_function (object->sinkpad,
|
||||
GST_DEBUG_FUNCPTR (gst_deinterlace2_chain));
|
||||
gst_pad_set_event_function (object->sinkpad,
|
||||
GST_DEBUG_FUNCPTR (gst_deinterlace2_sink_event));
|
||||
gst_pad_set_setcaps_function (object->sinkpad,
|
||||
GST_DEBUG_FUNCPTR (gst_deinterlace2_setcaps));
|
||||
gst_pad_set_getcaps_function (object->sinkpad,
|
||||
GST_DEBUG_FUNCPTR (gst_pad_proxy_getcaps));
|
||||
gst_element_add_pad (GST_ELEMENT (object), object->sinkpad);
|
||||
|
||||
object->srcpad = gst_pad_new_from_static_template (&src_templ, "src");
|
||||
gst_pad_set_event_function (object->srcpad,
|
||||
GST_DEBUG_FUNCPTR (gst_deinterlace2_src_event));
|
||||
gst_pad_set_query_type_function (object->srcpad,
|
||||
GST_DEBUG_FUNCPTR (gst_deinterlace2_src_query_types));
|
||||
gst_pad_set_query_function (object->srcpad,
|
||||
GST_DEBUG_FUNCPTR (gst_deinterlace2_src_query));
|
||||
gst_pad_set_setcaps_function (object->srcpad,
|
||||
GST_DEBUG_FUNCPTR (gst_deinterlace2_setcaps));
|
||||
gst_pad_set_getcaps_function (object->srcpad,
|
||||
GST_DEBUG_FUNCPTR (gst_pad_proxy_getcaps));
|
||||
gst_element_add_pad (GST_ELEMENT (object), object->srcpad);
|
||||
|
||||
gst_element_no_more_pads (GST_ELEMENT (object));
|
||||
|
||||
object->cpu_feature_flags = oil_cpu_get_flags ();
|
||||
|
||||
setup_speedy_calls (object->cpu_feature_flags, 0);
|
||||
object->pMemcpy = speedy_memcpy;
|
||||
|
||||
object->method = dscaler_tomsmocomp_get_method ();
|
||||
|
||||
object->history_count = 0;
|
||||
|
||||
object->field_layout = GST_DEINTERLACE2_LAYOUT_AUTO;
|
||||
|
||||
object->out_buf = NULL;
|
||||
object->output_stride = 0;
|
||||
object->line_length = 0;
|
||||
object->frame_width = 0;
|
||||
object->frame_height = 0;
|
||||
object->field_height = 0;
|
||||
object->field_stride = 0;
|
||||
|
||||
object->fields = GST_DEINTERLACE2_ALL;
|
||||
|
||||
object->bottom_field = TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
gst_deinterlace2_set_method (GstDeinterlace2 * object,
|
||||
GstDeinterlace2Methods method)
|
||||
{
|
||||
|
||||
switch (method) {
|
||||
case GST_DEINTERLACE2_TOM:
|
||||
object->method_id = method;
|
||||
object->method = dscaler_tomsmocomp_get_method ();
|
||||
break;
|
||||
case GST_DEINTERLACE2_GREEDY_H:
|
||||
object->method_id = method;
|
||||
object->method = dscaler_greedyh_get_method ();
|
||||
break;
|
||||
case GST_DEINTERLACE2_GREEDY_L:
|
||||
object->method_id = method;
|
||||
object->method = dscaler_greedyl_get_method ();
|
||||
break;
|
||||
case GST_DEINTERLACE2_VFIR:
|
||||
object->method_id = method;
|
||||
object->method = dscaler_vfir_get_method ();
|
||||
break;
|
||||
default:
|
||||
GST_WARNING ("Invalid Deinterlacer Method");
|
||||
}
|
||||
|
||||
|
||||
if (object->method->deinterlace_frame == NULL)
|
||||
object->method->deinterlace_frame = gst_deinterlace2_deinterlace_scanlines;
|
||||
|
||||
/* TODO: if current method requires less fields in the history,
|
||||
pop the diff from field_history.
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
gst_deinterlace2_set_property (GObject * _object, guint prop_id,
|
||||
const GValue * value, GParamSpec * pspec)
|
||||
{
|
||||
GstDeinterlace2 *object;
|
||||
|
||||
g_return_if_fail (GST_IS_DEINTERLACE2 (_object));
|
||||
object = GST_DEINTERLACE2 (_object);
|
||||
|
||||
switch (prop_id) {
|
||||
case ARG_METHOD:
|
||||
gst_deinterlace2_set_method (object, g_value_get_enum (value));
|
||||
break;
|
||||
case ARG_FIELDS:
|
||||
object->fields = g_value_get_enum (value);
|
||||
break;
|
||||
case ARG_FIELD_LAYOUT:
|
||||
object->field_layout = g_value_get_enum (value);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
gst_deinterlace2_get_property (GObject * _object, guint prop_id,
|
||||
GValue * value, GParamSpec * pspec)
|
||||
{
|
||||
GstDeinterlace2 *object;
|
||||
|
||||
g_return_if_fail (GST_IS_DEINTERLACE2 (_object));
|
||||
object = GST_DEINTERLACE2 (_object);
|
||||
|
||||
switch (prop_id) {
|
||||
case ARG_METHOD:
|
||||
g_value_set_enum (value, object->method_id);
|
||||
break;
|
||||
case ARG_FIELDS:
|
||||
g_value_set_enum (value, object->fields);
|
||||
break;
|
||||
case ARG_FIELD_LAYOUT:
|
||||
g_value_set_enum (value, object->field_layout);
|
||||
break;
|
||||
default:
|
||||
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gst_deinterlace2_finalize (GObject * object)
|
||||
{
|
||||
G_OBJECT_CLASS (parent_class)->dispose (object);
|
||||
}
|
||||
|
||||
static GstBuffer *
|
||||
gst_deinterlace2_pop_history (GstDeinterlace2 * object)
|
||||
{
|
||||
GstBuffer *buffer = NULL;
|
||||
|
||||
g_assert (object->history_count > 0);
|
||||
|
||||
buffer = object->field_history[object->history_count - 1].buf;
|
||||
|
||||
object->history_count--;
|
||||
GST_DEBUG ("pop, size(history): %d", object->history_count);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static GstBuffer *
|
||||
gst_deinterlace2_head_history (GstDeinterlace2 * object)
|
||||
{
|
||||
return object->field_history[object->history_count - 1].buf;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* invariant: field with smallest timestamp is object->field_history[object->history_count-1]
|
||||
|
||||
*/
|
||||
|
||||
static void
|
||||
gst_deinterlace2_push_history (GstDeinterlace2 * object, GstBuffer * buffer)
|
||||
{
|
||||
int i = 1;
|
||||
|
||||
GstClockTime timestamp;
|
||||
|
||||
GstClockTime field_diff;
|
||||
|
||||
g_assert (object->history_count < MAX_FIELD_HISTORY - 2);
|
||||
|
||||
for (i = MAX_FIELD_HISTORY - 1; i >= 2; i--) {
|
||||
object->field_history[i].buf = object->field_history[i - 2].buf;
|
||||
object->field_history[i].flags = object->field_history[i - 2].flags;
|
||||
}
|
||||
|
||||
if (object->field_layout == GST_DEINTERLACE2_LAYOUT_AUTO) {
|
||||
GST_WARNING ("Could not detect field layout. Assuming top field first.");
|
||||
object->field_layout = GST_DEINTERLACE2_LAYOUT_TFF;
|
||||
}
|
||||
|
||||
|
||||
if (object->field_layout == GST_DEINTERLACE2_LAYOUT_TFF) {
|
||||
GST_DEBUG ("Top field first");
|
||||
object->field_history[0].buf =
|
||||
gst_buffer_create_sub (buffer, object->line_length,
|
||||
GST_BUFFER_SIZE (buffer) - object->line_length);
|
||||
object->field_history[0].flags = PICTURE_INTERLACED_BOTTOM;
|
||||
object->field_history[1].buf = buffer;
|
||||
object->field_history[1].flags = PICTURE_INTERLACED_TOP;
|
||||
} else {
|
||||
GST_DEBUG ("Bottom field first");
|
||||
object->field_history[0].buf = buffer;
|
||||
object->field_history[0].flags = PICTURE_INTERLACED_TOP;
|
||||
object->field_history[1].buf =
|
||||
gst_buffer_create_sub (buffer, object->line_length,
|
||||
GST_BUFFER_SIZE (buffer) - object->line_length);
|
||||
object->field_history[1].flags = PICTURE_INTERLACED_BOTTOM;
|
||||
}
|
||||
|
||||
/* Timestamps are assigned to the field buffers under the assumption that
|
||||
the timestamp of the buffer equals the first fields timestamp */
|
||||
|
||||
timestamp = GST_BUFFER_TIMESTAMP (buffer);
|
||||
field_diff = GST_SECOND / (object->frame_rate_d * 2) / object->frame_rate_n;
|
||||
GST_BUFFER_TIMESTAMP (object->field_history[0].buf) = timestamp + field_diff;
|
||||
GST_BUFFER_TIMESTAMP (object->field_history[1].buf) = timestamp;
|
||||
|
||||
object->history_count += 2;
|
||||
GST_DEBUG ("push, size(history): %d", object->history_count);
|
||||
}
|
||||
|
||||
/* some methods support only deinterlace_/copy_scanline functions.
|
||||
This funtion calls them in the right manner. */
|
||||
static void
|
||||
gst_deinterlace2_deinterlace_scanlines (GstDeinterlace2 * object)
|
||||
{
|
||||
|
||||
gint line = 1;
|
||||
|
||||
gint cur_field_idx = object->history_count - object->method->fields_required;
|
||||
|
||||
GST_INFO ("cur_field_idx: %d", cur_field_idx);
|
||||
|
||||
guint8 *out_data = GST_BUFFER_DATA (object->out_buf);
|
||||
|
||||
guint8 *cur_field =
|
||||
GST_BUFFER_DATA (object->field_history[cur_field_idx].buf);
|
||||
guint8 *last_field = NULL;
|
||||
|
||||
guint8 *second_last_field = NULL;
|
||||
|
||||
/* method can just handle up to 3 history fields,
|
||||
bcs until now there isn't a plugin (with interp./copy scanline methods)
|
||||
that uses more */
|
||||
g_assert (object->method->fields_required <= 3);
|
||||
|
||||
if (object->method->fields_required >= 2) {
|
||||
last_field = GST_BUFFER_DATA (object->field_history[cur_field_idx + 1].buf);
|
||||
}
|
||||
if (object->method->fields_required >= 3) {
|
||||
second_last_field =
|
||||
GST_BUFFER_DATA (object->field_history[cur_field_idx + 2].buf);
|
||||
}
|
||||
|
||||
if (object->field_history[cur_field_idx].flags == PICTURE_INTERLACED_BOTTOM) {
|
||||
/* double the first scanline of the bottom field */
|
||||
blit_packed422_scanline (out_data, cur_field, object->frame_width);
|
||||
out_data += object->output_stride;
|
||||
}
|
||||
|
||||
blit_packed422_scanline (out_data, cur_field, object->frame_width);
|
||||
out_data += object->output_stride;
|
||||
line++;
|
||||
|
||||
for (; line <= object->field_height;) {
|
||||
deinterlace_scanline_data_t data;
|
||||
|
||||
/* interp. scanline */
|
||||
data.t0 = cur_field;
|
||||
data.b0 = cur_field + object->field_stride;
|
||||
|
||||
if (last_field != NULL) {
|
||||
data.tt1 = last_field;
|
||||
data.m1 = last_field + object->field_stride;
|
||||
data.bb1 = last_field + (object->field_stride * 2);
|
||||
|
||||
last_field += object->field_stride;
|
||||
}
|
||||
|
||||
if (second_last_field != NULL) {
|
||||
data.t2 = second_last_field;
|
||||
data.b2 = second_last_field + object->field_stride;
|
||||
}
|
||||
|
||||
/* set valid data for corner cases */
|
||||
if (line == 2) {
|
||||
data.tt1 = data.bb1;
|
||||
} else if (line == object->field_height) {
|
||||
data.bb1 = data.tt1;
|
||||
}
|
||||
|
||||
object->method->interpolate_scanline (object, &data, out_data);
|
||||
out_data += object->output_stride;
|
||||
|
||||
/* copy a scanline */
|
||||
data.tt0 = cur_field;
|
||||
data.m0 = cur_field + (object->field_stride);
|
||||
data.bb0 = cur_field + (object->field_stride * 2);
|
||||
cur_field += object->field_stride;
|
||||
|
||||
if (last_field != NULL) {
|
||||
data.t1 = last_field;
|
||||
data.b1 = last_field + object->field_stride;
|
||||
}
|
||||
|
||||
if (second_last_field != NULL) {
|
||||
data.tt2 = second_last_field;
|
||||
data.m2 = second_last_field + (object->field_stride);
|
||||
data.bb2 = second_last_field + (object->field_stride * 2);
|
||||
second_last_field += object->field_stride;
|
||||
}
|
||||
|
||||
/* set valid data for corner cases */
|
||||
if (line == object->field_height) {
|
||||
data.bb0 = data.tt0;
|
||||
data.bb2 = data.tt2;
|
||||
data.b1 = data.t1;
|
||||
}
|
||||
|
||||
object->method->copy_scanline (object, &data, out_data);
|
||||
out_data += object->output_stride;
|
||||
line++;
|
||||
}
|
||||
|
||||
if (object->field_history[cur_field_idx].flags == PICTURE_INTERLACED_TOP) {
|
||||
/* double the last scanline of the top field */
|
||||
blit_packed422_scanline (out_data, cur_field, object->frame_width);
|
||||
}
|
||||
}
|
||||
|
||||
static GstFlowReturn
|
||||
gst_deinterlace2_chain (GstPad * pad, GstBuffer * buf)
|
||||
{
|
||||
//GstBuffer *out_buf = NULL;
|
||||
GstDeinterlace2 *object = NULL;
|
||||
|
||||
GstClockTime timestamp;
|
||||
|
||||
//GstFlowReturn ret = GST_FLOW_OK;
|
||||
|
||||
object = GST_DEINTERLACE2 (GST_PAD_PARENT (pad));
|
||||
|
||||
gst_deinterlace2_push_history (object, buf);
|
||||
buf = NULL;
|
||||
|
||||
if (object->method != NULL) {
|
||||
int cur_field_idx = 0;
|
||||
|
||||
/* Not enough fields in the history */
|
||||
if (object->history_count < object->method->fields_required + 1) {
|
||||
/* TODO: do bob or just forward frame */
|
||||
GST_DEBUG ("HistoryCount=%d", object->history_count);
|
||||
return GST_FLOW_OK;
|
||||
}
|
||||
|
||||
if (object->fields == GST_DEINTERLACE2_ALL)
|
||||
GST_DEBUG ("All fields");
|
||||
if (object->fields == GST_DEINTERLACE2_TF)
|
||||
GST_DEBUG ("Top fields");
|
||||
if (object->fields == GST_DEINTERLACE2_BF)
|
||||
GST_DEBUG ("Bottom fields");
|
||||
|
||||
cur_field_idx = object->history_count - object->method->fields_required;
|
||||
|
||||
if ((object->field_history[cur_field_idx].flags == PICTURE_INTERLACED_TOP
|
||||
&& object->fields == GST_DEINTERLACE2_TF) ||
|
||||
object->fields == GST_DEINTERLACE2_ALL) {
|
||||
GST_DEBUG ("deinterlacing top field");
|
||||
|
||||
/* create new buffer */
|
||||
object->out_buf = gst_buffer_new_and_alloc (object->frame_size);
|
||||
gst_buffer_set_caps (object->out_buf, GST_PAD_CAPS (object->srcpad));
|
||||
|
||||
/* do magic calculus */
|
||||
if (object->method->deinterlace_frame != NULL) {
|
||||
object->method->deinterlace_frame (object);
|
||||
|
||||
buf = gst_deinterlace2_pop_history (object);
|
||||
timestamp = GST_BUFFER_TIMESTAMP (buf);
|
||||
gst_buffer_unref (buf);
|
||||
|
||||
GST_BUFFER_TIMESTAMP (object->out_buf) = timestamp;
|
||||
gst_pad_push (object->srcpad, object->out_buf);
|
||||
}
|
||||
}
|
||||
/* no calculation done: remove excess field */
|
||||
else if (object->field_history[cur_field_idx].flags ==
|
||||
PICTURE_INTERLACED_TOP && object->fields == GST_DEINTERLACE2_BF) {
|
||||
GST_DEBUG ("Removing unused top field");
|
||||
buf = gst_deinterlace2_pop_history (object);
|
||||
gst_buffer_unref (buf);
|
||||
}
|
||||
|
||||
cur_field_idx = object->history_count - object->method->fields_required;
|
||||
|
||||
/* deinterlace bottom_field */
|
||||
if ((object->field_history[cur_field_idx].flags == PICTURE_INTERLACED_BOTTOM
|
||||
&& object->fields == GST_DEINTERLACE2_BF) ||
|
||||
object->fields == GST_DEINTERLACE2_ALL) {
|
||||
GST_DEBUG ("deinterlacing bottom field");
|
||||
|
||||
/* create new buffer */
|
||||
object->out_buf = gst_buffer_new_and_alloc (object->frame_size);
|
||||
gst_buffer_set_caps (object->out_buf, GST_PAD_CAPS (object->srcpad));
|
||||
|
||||
/* do magic calculus */
|
||||
if (object->method->deinterlace_frame != NULL) {
|
||||
object->method->deinterlace_frame (object);
|
||||
|
||||
buf = gst_deinterlace2_pop_history (object);
|
||||
timestamp = GST_BUFFER_TIMESTAMP (buf);
|
||||
gst_buffer_unref (buf);
|
||||
|
||||
GST_BUFFER_TIMESTAMP (object->out_buf) = timestamp;
|
||||
gst_pad_push (object->srcpad, object->out_buf);
|
||||
}
|
||||
}
|
||||
/* no calculation done: remove excess field */
|
||||
else if (object->field_history[cur_field_idx].flags ==
|
||||
PICTURE_INTERLACED_BOTTOM && object->fields == GST_DEINTERLACE2_TF) {
|
||||
GST_DEBUG ("Removing unused bottom field");
|
||||
buf = gst_deinterlace2_pop_history (object);
|
||||
gst_buffer_unref (buf);
|
||||
}
|
||||
|
||||
|
||||
} else {
|
||||
object->out_buf = gst_deinterlace2_pop_history (object);
|
||||
gst_pad_push (object->srcpad, object->out_buf);
|
||||
}
|
||||
GST_DEBUG ("----chain end ----\n\n");
|
||||
|
||||
return GST_FLOW_OK;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_deinterlace2_setcaps (GstPad * pad, GstCaps * caps)
|
||||
{
|
||||
gboolean res = TRUE;
|
||||
|
||||
GstDeinterlace2 *object = GST_DEINTERLACE2 (gst_pad_get_parent (pad));
|
||||
|
||||
GstPad *otherpad;
|
||||
|
||||
GstStructure *structure;
|
||||
|
||||
GstVideoFormat fmt;
|
||||
|
||||
guint32 fourcc;
|
||||
|
||||
otherpad = (pad == object->srcpad) ? object->sinkpad : object->srcpad;
|
||||
|
||||
if (!gst_pad_accept_caps (otherpad, caps)
|
||||
|| !gst_pad_set_caps (otherpad, caps))
|
||||
goto caps_not_accepted;
|
||||
|
||||
structure = gst_caps_get_structure (caps, 0);
|
||||
|
||||
res = gst_structure_get_int (structure, "width", &object->frame_width);
|
||||
res &= gst_structure_get_int (structure, "height", &object->frame_height);
|
||||
res &=
|
||||
gst_structure_get_fraction (structure, "framerate", &object->frame_rate_n,
|
||||
&object->frame_rate_d);
|
||||
res &= gst_structure_get_fourcc (structure, "format", &fourcc);
|
||||
/* TODO: get interlaced, field_layout, field_order */
|
||||
if (!res)
|
||||
goto invalid_caps;
|
||||
|
||||
/* TODO: introduce object->field_stride */
|
||||
object->field_height = object->frame_height / 2;
|
||||
|
||||
fmt = gst_video_format_from_fourcc (fourcc);
|
||||
|
||||
/* TODO: only true if fields are subbuffers of interlaced frames,
|
||||
change when the buffer-fields concept has landed */
|
||||
object->field_stride =
|
||||
gst_video_format_get_row_stride (fmt, 0, object->frame_width) * 2;
|
||||
object->output_stride =
|
||||
gst_video_format_get_row_stride (fmt, 0, object->frame_width);
|
||||
|
||||
/* in bytes */
|
||||
object->line_length =
|
||||
gst_video_format_get_row_stride (fmt, 0, object->frame_width);
|
||||
object->frame_size =
|
||||
gst_video_format_get_size (fmt, object->frame_width,
|
||||
object->frame_height);
|
||||
|
||||
GST_DEBUG_OBJECT (object, "Set caps: %" GST_PTR_FORMAT, caps);
|
||||
|
||||
done:
|
||||
|
||||
gst_object_unref (object);
|
||||
return res;
|
||||
|
||||
invalid_caps:
|
||||
res = FALSE;
|
||||
GST_ERROR_OBJECT (object, "Invalid caps: %" GST_PTR_FORMAT, caps);
|
||||
goto done;
|
||||
|
||||
caps_not_accepted:
|
||||
res = FALSE;
|
||||
GST_ERROR_OBJECT (object, "Caps not accepted: %" GST_PTR_FORMAT, caps);
|
||||
goto done;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_deinterlace2_sink_event (GstPad * pad, GstEvent * event)
|
||||
{
|
||||
gboolean res = TRUE;
|
||||
|
||||
GstDeinterlace2 *object = GST_DEINTERLACE2 (gst_pad_get_parent (pad));
|
||||
|
||||
GST_LOG_OBJECT (pad, "received %s event", GST_EVENT_TYPE_NAME (event));
|
||||
|
||||
switch (GST_EVENT_TYPE (event)) {
|
||||
case GST_EVENT_FLUSH_STOP:
|
||||
case GST_EVENT_EOS:
|
||||
case GST_EVENT_NEWSEGMENT:
|
||||
/* TODO: reset history */
|
||||
|
||||
/* fall through */
|
||||
default:
|
||||
res = gst_pad_event_default (pad, event);
|
||||
break;
|
||||
}
|
||||
|
||||
gst_object_unref (object);
|
||||
return res;
|
||||
}
|
||||
|
||||
static GstStateChangeReturn
|
||||
gst_deinterlace2_change_state (GstElement * element, GstStateChange transition)
|
||||
{
|
||||
GstStateChangeReturn ret;
|
||||
|
||||
switch (transition) {
|
||||
case GST_STATE_CHANGE_NULL_TO_READY:
|
||||
break;
|
||||
case GST_STATE_CHANGE_READY_TO_PAUSED:
|
||||
break;
|
||||
case GST_STATE_CHANGE_PAUSED_TO_PLAYING:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
|
||||
if (ret != GST_STATE_CHANGE_SUCCESS)
|
||||
return ret;
|
||||
|
||||
switch (transition) {
|
||||
case GST_STATE_CHANGE_PLAYING_TO_PAUSED:
|
||||
break;
|
||||
case GST_STATE_CHANGE_PAUSED_TO_READY:
|
||||
/* TODO: reset history, clean up, etc */
|
||||
break;
|
||||
case GST_STATE_CHANGE_READY_TO_NULL:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_deinterlace2_src_event (GstPad * pad, GstEvent * event)
|
||||
{
|
||||
GstDeinterlace2 *object = GST_DEINTERLACE2 (gst_pad_get_parent (pad));
|
||||
|
||||
gboolean res;
|
||||
|
||||
GST_DEBUG_OBJECT (pad, "received %s event", GST_EVENT_TYPE_NAME (event));
|
||||
|
||||
switch (GST_EVENT_TYPE (event)) {
|
||||
default:
|
||||
res = gst_pad_event_default (pad, event);
|
||||
break;
|
||||
}
|
||||
|
||||
gst_object_unref (object);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_deinterlace2_src_query (GstPad * pad, GstQuery * query)
|
||||
{
|
||||
GstDeinterlace2 *object = GST_DEINTERLACE2 (gst_pad_get_parent (pad));
|
||||
|
||||
gboolean res = FALSE;
|
||||
|
||||
GST_LOG_OBJECT (object, "%s query", GST_QUERY_TYPE_NAME (query));
|
||||
|
||||
switch (GST_QUERY_TYPE (query)) {
|
||||
case GST_QUERY_LATENCY:
|
||||
{
|
||||
GstClockTime min, max;
|
||||
|
||||
gboolean live;
|
||||
|
||||
GstPad *peer;
|
||||
|
||||
if ((peer = gst_pad_get_peer (object->sinkpad))) {
|
||||
if ((res = gst_pad_query (peer, query))) {
|
||||
GstClockTime latency;
|
||||
|
||||
gst_query_parse_latency (query, &live, &min, &max);
|
||||
|
||||
GST_DEBUG ("Peer latency: min %"
|
||||
GST_TIME_FORMAT " max %" GST_TIME_FORMAT,
|
||||
GST_TIME_ARGS (min), GST_TIME_ARGS (max));
|
||||
|
||||
/* TODO: calculate our own latency from framerate
|
||||
* and object->method->fields_required */
|
||||
/* add our own latency */
|
||||
|
||||
latency =
|
||||
gst_util_uint64_scale (object->method->fields_required *
|
||||
GST_SECOND, object->frame_rate_d, object->frame_rate_n);
|
||||
|
||||
GST_DEBUG ("Our latency: min %" GST_TIME_FORMAT
|
||||
", max %" GST_TIME_FORMAT,
|
||||
GST_TIME_ARGS (latency), GST_TIME_ARGS (latency));
|
||||
|
||||
min += latency;
|
||||
if (max != GST_CLOCK_TIME_NONE)
|
||||
max += latency;
|
||||
else
|
||||
max = latency;
|
||||
|
||||
GST_DEBUG ("Calculated total latency : min %"
|
||||
GST_TIME_FORMAT " max %" GST_TIME_FORMAT,
|
||||
GST_TIME_ARGS (min), GST_TIME_ARGS (max));
|
||||
|
||||
gst_query_set_latency (query, live, min, max);
|
||||
}
|
||||
gst_object_unref (peer);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
res = gst_pad_query_default (pad, query);
|
||||
break;
|
||||
}
|
||||
|
||||
gst_object_unref (object);
|
||||
return res;
|
||||
}
|
||||
|
||||
static const GstQueryType *
|
||||
gst_deinterlace2_src_query_types (GstPad * pad)
|
||||
{
|
||||
static const GstQueryType types[] = {
|
||||
GST_QUERY_LATENCY,
|
||||
GST_QUERY_NONE
|
||||
};
|
||||
return types;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
plugin_init (GstPlugin * plugin)
|
||||
{
|
||||
GST_DEBUG_CATEGORY_INIT (deinterlace2_debug, "deinterlace2", 0,
|
||||
"Deinterlacer");
|
||||
|
||||
oil_init ();
|
||||
|
||||
if (!gst_element_register (plugin, "deinterlace2", GST_RANK_NONE,
|
||||
GST_TYPE_DEINTERLACE2)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
|
||||
GST_VERSION_MINOR,
|
||||
"deinterlace2",
|
||||
"Deinterlacer", plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME,
|
||||
GST_PACKAGE_ORIGIN);
|
267
gst/deinterlace2/gstdeinterlace2.h
Normal file
267
gst/deinterlace2/gstdeinterlace2.h
Normal file
|
@ -0,0 +1,267 @@
|
|||
/*
|
||||
* GStreamer
|
||||
* Copyright (C) 2005 Martin Eikermann <meiker@upb.de>
|
||||
* Copyright (C) 2008 Sebastian Dröge <slomo@circular-chaos.org>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef __GST_DEINTERLACE_2_H__
|
||||
#define __GST_DEINTERLACE_2_H__
|
||||
|
||||
#include <liboil/liboil.h>
|
||||
#include <liboil/liboilfunction.h>
|
||||
#include <liboil/liboilcpu.h>
|
||||
|
||||
#include <gst/gst.h>
|
||||
#include <gst/base/gstbasetransform.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
#define GST_TYPE_DEINTERLACE2 \
|
||||
(gst_deinterlace2_get_type())
|
||||
#define GST_DEINTERLACE2(obj) \
|
||||
(G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_DEINTERLACE2,GstDeinterlace2))
|
||||
#define GST_DEINTERLACE2_CLASS(klass) \
|
||||
(G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_DEINTERLACE2,GstDeinterlace2))
|
||||
#define GST_IS_DEINTERLACE2(obj) \
|
||||
(G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_DEINTERLACE2))
|
||||
#define GST_IS_DEINTERLACE2_CLASS(obj) \
|
||||
(G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_DEINTERLACE2))
|
||||
|
||||
typedef struct _GstDeinterlace2 GstDeinterlace2;
|
||||
typedef struct _GstDeinterlace2Class GstDeinterlace2Class;
|
||||
|
||||
typedef struct deinterlace_setting_s deinterlace_setting_t;
|
||||
typedef struct deinterlace_method_s deinterlace_method_t;
|
||||
typedef struct deinterlace_scanline_data_s deinterlace_scanline_data_t;
|
||||
typedef struct deinterlace_frame_data_s deinterlace_frame_data_t;
|
||||
|
||||
/**
|
||||
* There are two scanline functions that every deinterlacer plugin
|
||||
* must implement to do its work: one for a 'copy' and one for
|
||||
* an 'interpolate' for the currently active field. This so so that
|
||||
* while plugins may be delaying fields, the external API assumes that
|
||||
* the plugin is completely realtime.
|
||||
*
|
||||
* Each deinterlacing routine can require data from up to four fields.
|
||||
* The most recent field captured is field 0, and increasing numbers go
|
||||
* backwards in time.
|
||||
*/
|
||||
struct deinterlace_scanline_data_s
|
||||
{
|
||||
guint8 *tt0, *t0, *m0, *b0, *bb0;
|
||||
guint8 *tt1, *t1, *m1, *b1, *bb1;
|
||||
guint8 *tt2, *t2, *m2, *b2, *bb2;
|
||||
guint8 *tt3, *t3, *m3, *b3, *bb3;
|
||||
int bottom_field;
|
||||
};
|
||||
|
||||
/**
|
||||
* | t-3 t-2 t-1 t
|
||||
* | Field 3 | Field 2 | Field 1 | Field 0 |
|
||||
* | TT3 | | TT1 | |
|
||||
* | | T2 | | T0 |
|
||||
* | M3 | | M1 | |
|
||||
* | | B2 | | B0 |
|
||||
* | BB3 | | BB1 | |
|
||||
*
|
||||
* While all pointers are passed in, each plugin is only guarenteed for
|
||||
* the ones it indicates it requires (in the fields_required parameter)
|
||||
* to be available.
|
||||
*
|
||||
* Pointers are always to scanlines in the standard packed 4:2:2 format.
|
||||
*/
|
||||
typedef void (*deinterlace_interp_scanline_t) (GstDeinterlace2 * object,
|
||||
deinterlace_scanline_data_t * data, guint8 * output);
|
||||
/**
|
||||
* For the copy scanline, the API is basically the same, except that
|
||||
* we're given a scanline to 'copy'.
|
||||
*
|
||||
* | t-3 t-2 t-1 t
|
||||
* | Field 3 | Field 2 | Field 1 | Field 0 |
|
||||
* | | TT2 | | TT0 |
|
||||
* | T3 | | T1 | |
|
||||
* | | M2 | | M0 |
|
||||
* | B3 | | B1 | |
|
||||
* | | BB2 | | BB0 |
|
||||
*/
|
||||
typedef void (*deinterlace_copy_scanline_t) (GstDeinterlace2 * object,
|
||||
deinterlace_scanline_data_t * data, guint8 * output);
|
||||
|
||||
/**
|
||||
* The frame function is for deinterlacing plugins that can only act
|
||||
* on whole frames, rather than on a scanline at a time.
|
||||
*/
|
||||
struct deinterlace_frame_data_s
|
||||
{
|
||||
guint8 *f0;
|
||||
guint8 *f1;
|
||||
guint8 *f2;
|
||||
guint8 *f3;
|
||||
};
|
||||
|
||||
typedef void (*deinterlace_frame_t) (GstDeinterlace2 * object);
|
||||
|
||||
|
||||
/**
|
||||
* This structure defines the deinterlacer plugin.
|
||||
*/
|
||||
struct deinterlace_method_s
|
||||
{
|
||||
int version;
|
||||
const char *name;
|
||||
const char *short_name;
|
||||
int fields_required;
|
||||
int accelrequired;
|
||||
int doscalerbob;
|
||||
int numsettings;
|
||||
deinterlace_setting_t *settings;
|
||||
int scanlinemode;
|
||||
deinterlace_interp_scanline_t interpolate_scanline;
|
||||
deinterlace_copy_scanline_t copy_scanline;
|
||||
deinterlace_frame_t deinterlace_frame;
|
||||
const char *description[10];
|
||||
};
|
||||
|
||||
/**
|
||||
* Registers a new deinterlace method.
|
||||
*/
|
||||
void register_deinterlace_method (deinterlace_method_t * method);
|
||||
|
||||
/**
|
||||
* Returns how many deinterlacing methods are available.
|
||||
*/
|
||||
int get_num_deinterlace_methods (void);
|
||||
|
||||
/**
|
||||
* Returns the specified method in the list.
|
||||
*/
|
||||
deinterlace_method_t *get_deinterlace_method (int i);
|
||||
|
||||
/**
|
||||
* Builds the usable method list.
|
||||
*/
|
||||
void filter_deinterlace_methods (int accel, int fieldsavailable);
|
||||
|
||||
#define MAX_FIELD_HISTORY 10
|
||||
|
||||
#define PICTURE_PROGRESSIVE 0
|
||||
#define PICTURE_INTERLACED_BOTTOM 1
|
||||
#define PICTURE_INTERLACED_TOP 2
|
||||
#define PICTURE_INTERLACED_MASK (PICTURE_INTERLACED_BOTTOM | PICTURE_INTERLACED_TOP)
|
||||
|
||||
typedef void (MEMCPY_FUNC) (void *pOutput, const void *pInput, size_t nSize);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* pointer to the start of data for this field */
|
||||
GstBuffer *buf;
|
||||
/* see PICTURE_ flags */
|
||||
guint flags;
|
||||
} GstPicture;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
GST_DEINTERLACE2_TOM,
|
||||
GST_DEINTERLACE2_GREEDY_H,
|
||||
GST_DEINTERLACE2_GREEDY_L,
|
||||
GST_DEINTERLACE2_VFIR
|
||||
} GstDeinterlace2Methods;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
GST_DEINTERLACE2_ALL, /* All (missing data is interp.) */
|
||||
GST_DEINTERLACE2_TF, /* Top Fields Only */
|
||||
GST_DEINTERLACE2_BF /* Bottom Fields Only */
|
||||
} GstDeinterlace2Fields;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
GST_DEINTERLACE2_LAYOUT_AUTO,
|
||||
GST_DEINTERLACE2_LAYOUT_TFF,
|
||||
GST_DEINTERLACE2_LAYOUT_BFF
|
||||
} GstDeinterlace2FieldLayout;
|
||||
|
||||
struct _GstDeinterlace2
|
||||
{
|
||||
GstElement parent;
|
||||
|
||||
GstPad *srcpad, *sinkpad;
|
||||
|
||||
guint history_count;
|
||||
|
||||
guint cpu_feature_flags;
|
||||
GstDeinterlace2FieldLayout field_layout;
|
||||
|
||||
guint frame_size;
|
||||
gint frame_rate_n, frame_rate_d;
|
||||
|
||||
GstDeinterlace2Fields fields;
|
||||
|
||||
GstDeinterlace2Methods method_id;
|
||||
deinterlace_method_t *method;
|
||||
|
||||
/* The most recent pictures
|
||||
PictureHistory[0] is always the most recent.
|
||||
Pointers are NULL if the picture in question isn't valid, e.g. because
|
||||
the program just started or a picture was skipped.
|
||||
*/
|
||||
GstPicture field_history[MAX_FIELD_HISTORY];
|
||||
|
||||
/* Current overlay buffer pointer. */
|
||||
GstBuffer *out_buf;
|
||||
|
||||
/* Overlay pitch (number of bytes between scanlines). */
|
||||
guint output_stride;
|
||||
|
||||
/* Number of bytes of actual data in each scanline. May be less than
|
||||
OverlayPitch since the overlay's scanlines might have alignment
|
||||
requirements. Generally equal to FrameWidth * 2.
|
||||
*/
|
||||
guint line_length;
|
||||
|
||||
/* Number of pixels in each scanline. */
|
||||
gint frame_width;
|
||||
|
||||
/* Number of scanlines per frame. */
|
||||
gint frame_height;
|
||||
|
||||
/* Number of scanlines per field. FrameHeight / 2, mostly for
|
||||
cleanliness so we don't have to keep dividing FrameHeight by 2.
|
||||
*/
|
||||
gint field_height;
|
||||
|
||||
/* Function pointer to optimized memcpy function */
|
||||
MEMCPY_FUNC *pMemcpy;
|
||||
|
||||
/* distance between lines in image
|
||||
need not match the pixel width
|
||||
*/
|
||||
guint field_stride;
|
||||
|
||||
gboolean bottom_field;
|
||||
};
|
||||
|
||||
struct _GstDeinterlace2Class
|
||||
{
|
||||
GstElementClass parent_class;
|
||||
};
|
||||
|
||||
GType gst_deinterlace2_get_type (void);
|
||||
|
||||
G_END_DECLS
|
||||
#endif /* __GST_DEINTERLACE_2_H__ */
|
207
gst/deinterlace2/tvtime/greedy.c
Normal file
207
gst/deinterlace2/tvtime/greedy.c
Normal file
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (c) 2000 Tom Barry All rights reserved.
|
||||
* mmx.h port copyright (c) 2002 Billy Biggs <vektor@dumbterm.net>.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry
|
||||
* and Billy Biggs.
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#if defined (__SVR4) && defined (__sun)
|
||||
# include <sys/int_types.h>
|
||||
#else
|
||||
# include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include "mmx.h"
|
||||
#include "sse.h"
|
||||
#include "gstdeinterlace2.h"
|
||||
#include "speedtools.h"
|
||||
#include "speedy.h"
|
||||
|
||||
// This is a simple lightweight DeInterlace method that uses little CPU time
|
||||
// but gives very good results for low or intermedite motion.
|
||||
// It defers frames by one field, but that does not seem to produce noticeable
|
||||
// lip sync problems.
|
||||
//
|
||||
// The method used is to take either the older or newer weave pixel depending
|
||||
// upon which give the smaller comb factor, and then clip to avoid large damage
|
||||
// when wrong.
|
||||
//
|
||||
// I'd intended this to be part of a larger more elaborate method added to
|
||||
// Blended Clip but this give too good results for the CPU to ignore here.
|
||||
|
||||
static void
|
||||
copy_scanline (GstDeinterlace2 * object,
|
||||
deinterlace_scanline_data_t * data, uint8_t * output)
|
||||
{
|
||||
blit_packed422_scanline (output, data->m1, object->frame_width);
|
||||
}
|
||||
|
||||
static int GreedyMaxComb = 15;
|
||||
|
||||
static void
|
||||
deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object,
|
||||
deinterlace_scanline_data_t * data, uint8_t * output)
|
||||
{
|
||||
#ifdef HAVE_CPU_I386
|
||||
mmx_t MaxComb;
|
||||
|
||||
uint8_t *m0 = data->m0;
|
||||
|
||||
uint8_t *t1 = data->t1;
|
||||
|
||||
uint8_t *b1 = data->b1;
|
||||
|
||||
uint8_t *m2 = data->m2;
|
||||
|
||||
int width = object->frame_width;
|
||||
|
||||
// How badly do we let it weave? 0-255
|
||||
MaxComb.ub[0] = GreedyMaxComb;
|
||||
MaxComb.ub[1] = GreedyMaxComb;
|
||||
MaxComb.ub[2] = GreedyMaxComb;
|
||||
MaxComb.ub[3] = GreedyMaxComb;
|
||||
MaxComb.ub[4] = GreedyMaxComb;
|
||||
MaxComb.ub[5] = GreedyMaxComb;
|
||||
MaxComb.ub[6] = GreedyMaxComb;
|
||||
MaxComb.ub[7] = GreedyMaxComb;
|
||||
|
||||
// L2 == m0
|
||||
// L1 == t1
|
||||
// L3 == b1
|
||||
// LP2 == m2
|
||||
|
||||
width /= 4;
|
||||
while (width--) {
|
||||
movq_m2r (*t1, mm1); // L1
|
||||
movq_m2r (*m0, mm2); // L2
|
||||
movq_m2r (*b1, mm3); // L3
|
||||
movq_m2r (*m2, mm0); // LP2
|
||||
|
||||
// average L1 and L3 leave result in mm4
|
||||
movq_r2r (mm1, mm4); // L1
|
||||
pavgb_r2r (mm3, mm4); // (L1 + L3)/2
|
||||
|
||||
|
||||
// get abs value of possible L2 comb
|
||||
movq_r2r (mm2, mm7); // L2
|
||||
psubusb_r2r (mm4, mm7); // L2 - avg
|
||||
movq_r2r (mm4, mm5); // avg
|
||||
psubusb_r2r (mm2, mm5); // avg - L2
|
||||
por_r2r (mm7, mm5); // abs(avg-L2)
|
||||
movq_r2r (mm4, mm6); // copy of avg for later
|
||||
|
||||
|
||||
// get abs value of possible LP2 comb
|
||||
movq_r2r (mm0, mm7); // LP2
|
||||
psubusb_r2r (mm4, mm7); // LP2 - avg
|
||||
psubusb_r2r (mm0, mm4); // avg - LP2
|
||||
por_r2r (mm7, mm4); // abs(avg-LP2)
|
||||
|
||||
// use L2 or LP2 depending upon which makes smaller comb
|
||||
psubusb_r2r (mm5, mm4); // see if it goes to zero
|
||||
psubusb_r2r (mm5, mm5); // 0
|
||||
pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
|
||||
pcmpeqb_r2r (mm4, mm5); // opposite of mm4
|
||||
|
||||
// if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
|
||||
pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
|
||||
pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
|
||||
por_r2r (mm5, mm4); // may the best win
|
||||
|
||||
// Now lets clip our chosen value to be not outside of the range
|
||||
// of the high/low range L1-L3 by more than abs(L1-L3)
|
||||
// This allows some comb but limits the damages and also allows more
|
||||
// detail than a boring oversmoothed clip.
|
||||
|
||||
movq_r2r (mm1, mm2); // copy L1
|
||||
psubusb_r2r (mm3, mm2); // - L3, with saturation
|
||||
paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
|
||||
|
||||
pcmpeqb_r2r (mm7, mm7); // all ffffffff
|
||||
psubusb_r2r (mm1, mm7); // - L1
|
||||
paddusb_r2r (mm7, mm3); // add, may sat at fff..
|
||||
psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
paddusb_m2r (MaxComb, mm2); // increase max by diff
|
||||
psubusb_m2r (MaxComb, mm3); // lower min by diff
|
||||
|
||||
psubusb_r2r (mm3, mm4); // best - Min
|
||||
paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
|
||||
|
||||
pcmpeqb_r2r (mm7, mm7); // all ffffffff
|
||||
psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
|
||||
paddusb_r2r (mm7, mm2); // add may sat at FFF..
|
||||
psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
|
||||
|
||||
movntq_r2m (mm2, *output); // move in our clipped best
|
||||
|
||||
// Advance to the next set of pixels.
|
||||
output += 8;
|
||||
m0 += 8;
|
||||
t1 += 8;
|
||||
b1 += 8;
|
||||
m2 += 8;
|
||||
}
|
||||
sfence ();
|
||||
emms ();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static deinterlace_method_t greedyl_method = {
|
||||
0, //DEINTERLACE_PLUGIN_API_VERSION,
|
||||
"Motion Adaptive: Simple Detection",
|
||||
"AdaptiveSimple",
|
||||
3,
|
||||
OIL_IMPL_FLAG_MMXEXT,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
copy_scanline,
|
||||
deinterlace_greedy_packed422_scanline_mmxext,
|
||||
0,
|
||||
{"Uses heuristics to detect motion in the input",
|
||||
"frames and reconstruct image detail where",
|
||||
"possible. Use this for high quality output",
|
||||
"even on monitors set to an arbitrary refresh",
|
||||
"rate.",
|
||||
"",
|
||||
"Simple detection uses linear interpolation",
|
||||
"where motion is detected, using a two-field",
|
||||
"buffer. This is the Greedy: Low Motion",
|
||||
"deinterlacer from DScaler."}
|
||||
};
|
||||
|
||||
deinterlace_method_t *
|
||||
dscaler_greedyl_get_method (void)
|
||||
{
|
||||
return &greedyl_method;
|
||||
}
|
307
gst/deinterlace2/tvtime/greedyh.asm
Normal file
307
gst/deinterlace2/tvtime/greedyh.asm
Normal file
|
@ -0,0 +1,307 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (c) 2001 Tom Barry. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Relicensed for GStreamer from GPL to LGPL with permit from Tom Barry.
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
|
||||
#include "x86-64_macros.inc"
|
||||
|
||||
void FUNCT_NAME( GstDeinterlace2 *object)
|
||||
{
|
||||
int64_t i;
|
||||
int InfoIsOdd = 0;
|
||||
|
||||
// in tight loop some vars are accessed faster in local storage
|
||||
int64_t YMask = 0x00ff00ff00ff00ffull; // to keep only luma
|
||||
int64_t UVMask = 0xff00ff00ff00ff00ull; // to keep only chroma
|
||||
int64_t ShiftMask = 0xfefffefffefffeffull; // to avoid shifting chroma to luma
|
||||
int64_t QW256 = 0x0100010001000100ull; // 4 256's
|
||||
|
||||
// Set up our two parms that are actually evaluated for each pixel
|
||||
i=GreedyMaxComb;
|
||||
int64_t MaxComb = i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;
|
||||
|
||||
i = GreedyMotionThreshold; // scale to range of 0-257
|
||||
int64_t MotionThreshold = i << 48 | i << 32 | i << 16 | i | UVMask;
|
||||
|
||||
i = GreedyMotionSense; // scale to range of 0-257
|
||||
int64_t MotionSense = i << 48 | i << 32 | i << 16 | i;
|
||||
|
||||
int Line;
|
||||
long LoopCtr;
|
||||
unsigned int Pitch = object->field_stride;
|
||||
|
||||
unsigned char* L1; // ptr to Line1, of 3
|
||||
unsigned char* L2; // ptr to Line2, the weave line
|
||||
unsigned char* L3; // ptr to Line3
|
||||
|
||||
unsigned char* L2P; // ptr to prev Line2
|
||||
unsigned char* Dest = GST_BUFFER_DATA(object->out_buf);
|
||||
|
||||
int64_t QW256B;
|
||||
int64_t LastAvg=0; //interp value from left qword
|
||||
|
||||
i = 0xffffffff - 256;
|
||||
QW256B = i << 48 | i << 32 | i << 16 | i; // save a couple instr on PMINSW instruct.
|
||||
|
||||
|
||||
// copy first even line no matter what, and the first odd line if we're
|
||||
// processing an EVEN field. (note diff from other deint rtns.)
|
||||
|
||||
if (object->field_history[object->history_count-1].flags == PICTURE_INTERLACED_BOTTOM) {
|
||||
InfoIsOdd = 1;
|
||||
|
||||
L1 = GST_BUFFER_DATA(object->field_history[object->history_count-2].buf);
|
||||
L2 = GST_BUFFER_DATA(object->field_history[object->history_count-1].buf);
|
||||
L3 = L1 + Pitch;
|
||||
L2P = GST_BUFFER_DATA(object->field_history[object->history_count-3].buf);
|
||||
|
||||
// copy first even line
|
||||
object->pMemcpy(Dest, L1, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
}
|
||||
else {
|
||||
InfoIsOdd = 0;
|
||||
L1 = GST_BUFFER_DATA(object->field_history[object->history_count-2].buf);
|
||||
L2 = GST_BUFFER_DATA(object->field_history[object->history_count-1].buf) + Pitch;
|
||||
L3 = L1 + Pitch;
|
||||
L2P = GST_BUFFER_DATA(object->field_history[object->history_count-3].buf) + Pitch;
|
||||
|
||||
// copy first even line
|
||||
object->pMemcpy(Dest, GST_BUFFER_DATA(object->field_history[0].buf), object->line_length);
|
||||
Dest += object->output_stride;
|
||||
// then first odd line
|
||||
object->pMemcpy(Dest, L1, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
}
|
||||
|
||||
|
||||
long oldbx;
|
||||
|
||||
for (Line = 0; Line < (object->field_height - 1); ++Line) {
|
||||
LoopCtr = object->line_length / 8 - 1; // there are LineLength / 8 qwords per line but do 1 less, adj at end of loop
|
||||
|
||||
// For ease of reading, the comments below assume that we're operating on an odd
|
||||
// field (i.e., that InfoIsOdd is true). Assume the obvious for even lines..
|
||||
__asm__ __volatile__
|
||||
(
|
||||
// save ebx (-fPIC)
|
||||
MOVX" %%"XBX", %[oldbx]\n\t"
|
||||
|
||||
MOVX" %[L1], %%"XAX"\n\t"
|
||||
LEAX" 8(%%"XAX"), %%"XBX"\n\t" // next qword needed by DJR
|
||||
MOVX" %[L3], %%"XCX"\n\t"
|
||||
SUBX" %%"XAX", %%"XCX"\n\t" // carry L3 addr as an offset
|
||||
MOVX" %[L2P], %%"XDX"\n\t"
|
||||
MOVX" %[L2], %%"XSI"\n\t"
|
||||
MOVX" %[Dest], %%"XDI"\n\t" // DL1 if Odd or DL2 if Even
|
||||
|
||||
".align 8\n\t"
|
||||
"1:\n\t"
|
||||
|
||||
"movq (%%"XSI"), %%mm0\n\t" // L2 - the newest weave pixel value
|
||||
"movq (%%"XAX"), %%mm1\n\t" // L1 - the top pixel
|
||||
"movq (%%"XDX"), %%mm2\n\t" // L2P - the prev weave pixel
|
||||
"movq (%%"XAX", %%"XCX"), %%mm3\n\t" // L3, next odd row
|
||||
"movq %%mm1, %%mm6\n\t" // L1 - get simple single pixel interp
|
||||
// pavgb mm6, mm3 // use macro below
|
||||
V_PAVGB ("%%mm6", "%%mm3", "%%mm4", "%[ShiftMask]")
|
||||
|
||||
// DJR - Diagonal Jaggie Reduction
|
||||
// In the event that we are going to use an average (Bob) pixel we do not want a jagged
|
||||
// stair step effect. To combat this we avg in the 2 horizontally adjacen pixels into the
|
||||
// interpolated Bob mix. This will do horizontal smoothing for only the Bob'd pixels.
|
||||
|
||||
"movq %[LastAvg], %%mm4\n\t" // the bob value from prev qword in row
|
||||
"movq %%mm6, %[LastAvg]\n\t" // save for next pass
|
||||
"psrlq $48, %%mm4\n\t" // right justify 1 pixel
|
||||
"movq %%mm6, %%mm7\n\t" // copy of simple bob pixel
|
||||
"psllq $16, %%mm7\n\t" // left justify 3 pixels
|
||||
"por %%mm7, %%mm4\n\t" // and combine
|
||||
|
||||
"movq (%%"XBX"), %%mm5\n\t" // next horiz qword from L1
|
||||
// pavgb mm5, qword ptr[ebx+ecx] // next horiz qword from L3, use macro below
|
||||
V_PAVGB ("%%mm5", "(%%"XBX",%%"XCX")", "%%mm7", "%[ShiftMask]")
|
||||
"psllq $48, %%mm5\n\t" // left just 1 pixel
|
||||
"movq %%mm6, %%mm7\n\t" // another copy of simple bob pixel
|
||||
"psrlq $16, %%mm7\n\t" // right just 3 pixels
|
||||
"por %%mm7, %%mm5\n\t" // combine
|
||||
// pavgb mm4, mm5 // avg of forward and prev by 1 pixel, use macro
|
||||
V_PAVGB ("%%mm4", "%%mm5", "%%mm5", "%[ShiftMask]") // mm5 gets modified if MMX
|
||||
// pavgb mm6, mm4 // avg of center and surround interp vals, use macro
|
||||
V_PAVGB ("%%mm6", "%%mm4", "%%mm7", "%[ShiftMask]")
|
||||
|
||||
// Don't do any more averaging than needed for mmx. It hurts performance and causes rounding errors.
|
||||
#ifndef IS_MMX
|
||||
// pavgb mm4, mm6 // 1/4 center, 3/4 adjacent
|
||||
V_PAVGB ("%%mm4", "%%mm6", "%%mm7", "%[ShiftMask]")
|
||||
// pavgb mm6, mm4 // 3/8 center, 5/8 adjacent
|
||||
V_PAVGB ("%%mm6", "%%mm4", "%%mm7", "%[ShiftMask]")
|
||||
#endif
|
||||
|
||||
// get abs value of possible L2 comb
|
||||
"movq %%mm6, %%mm4\n\t" // work copy of interp val
|
||||
"movq %%mm2, %%mm7\n\t" // L2
|
||||
"psubusb %%mm4, %%mm7\n\t" // L2 - avg
|
||||
"movq %%mm4, %%mm5\n\t" // avg
|
||||
"psubusb %%mm2, %%mm5\n\t" // avg - L2
|
||||
"por %%mm7, %%mm5\n\t" // abs(avg-L2)
|
||||
|
||||
// get abs value of possible L2P comb
|
||||
"movq %%mm0, %%mm7\n\t" // L2P
|
||||
"psubusb %%mm4, %%mm7\n\t" // L2P - avg
|
||||
"psubusb %%mm0, %%mm4\n\t" // avg - L2P
|
||||
"por %%mm7, %%mm4\n\t" // abs(avg-L2P)
|
||||
|
||||
// use L2 or L2P depending upon which makes smaller comb
|
||||
"psubusb %%mm5, %%mm4\n\t" // see if it goes to zero
|
||||
"psubusb %%mm5, %%mm5\n\t" // 0
|
||||
"pcmpeqb %%mm5, %%mm4\n\t" // if (mm4=0) then FF else 0
|
||||
"pcmpeqb %%mm4, %%mm5\n\t" // opposite of mm4
|
||||
|
||||
// if Comb(L2P) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
|
||||
"pand %%mm2, %%mm5\n\t" // use L2 if mm5 == ff, else 0
|
||||
"pand %%mm0, %%mm4\n\t" // use L2P if mm4 = ff, else 0
|
||||
"por %%mm5, %%mm4\n\t" // may the best win
|
||||
|
||||
// Inventory: at this point we have the following values:
|
||||
// mm0 = L2P (or L2)
|
||||
// mm1 = L1
|
||||
// mm2 = L2 (or L2P)
|
||||
// mm3 = L3
|
||||
// mm4 = the best of L2,L2P weave pixel, base upon comb
|
||||
// mm6 = the avg interpolated value, if we need to use it
|
||||
|
||||
// Let's measure movement, as how much the weave pixel has changed
|
||||
"movq %%mm2, %%mm7\n\t"
|
||||
"psubusb %%mm0, %%mm2\n\t"
|
||||
"psubusb %%mm7, %%mm0\n\t"
|
||||
"por %%mm2, %%mm0\n\t" // abs value of change, used later
|
||||
|
||||
// Now lets clip our chosen value to be not outside of the range
|
||||
// of the high/low range L1-L3 by more than MaxComb.
|
||||
// This allows some comb but limits the damages and also allows more
|
||||
// detail than a boring oversmoothed clip.
|
||||
"movq %%mm1, %%mm2\n\t" // copy L1
|
||||
// pmaxub mm2, mm3 // use macro
|
||||
V_PMAXUB ("%%mm2", "%%mm3") // now = Max(L1,L3)
|
||||
"movq %%mm1, %%mm5\n\t" // copy L1
|
||||
// pminub mm5, mm3 // now = Min(L1,L3), use macro
|
||||
V_PMINUB ("%%mm5", "%%mm3", "%%mm7")
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
"psubusb %[MaxComb], %%mm5\n\t" // lower min by diff
|
||||
"paddusb %[MaxComb], %%mm2\n\t" // increase max by diff
|
||||
// pmaxub mm4, mm5 // now = Max(best,Min(L1,L3) use macro
|
||||
V_PMAXUB ("%%mm4", "%%mm5")
|
||||
// pminub mm4, mm2 // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
|
||||
V_PMINUB ("%%mm4", "%%mm2", "%%mm7")
|
||||
|
||||
// Blend weave pixel with bob pixel, depending on motion val in mm0
|
||||
"psubusb %[MotionThreshold], %%mm0\n\t"// test Threshold, clear chroma change >>>??
|
||||
"pmullw %[MotionSense], %%mm0\n\t" // mul by user factor, keep low 16 bits
|
||||
"movq %[QW256], %%mm7\n\t"
|
||||
#ifdef HAVE_SSE
|
||||
"pminsw %%mm7, %%mm0\n\t" // max = 256
|
||||
#else
|
||||
"paddusw %[QW256B], %%mm0\n\t" // add, may sat at fff..
|
||||
"psubusw %[QW256B], %%mm0\n\t" // now = Min(L1,256)
|
||||
#endif
|
||||
"psubusw %%mm0, %%mm7\n\t" // so the 2 sum to 256, weighted avg
|
||||
"movq %%mm4, %%mm2\n\t" // save weave chroma info before trashing
|
||||
"pand %[YMask], %%mm4\n\t" // keep only luma from calc'd value
|
||||
"pmullw %%mm7, %%mm4\n\t" // use more weave for less motion
|
||||
"pand %[YMask], %%mm6\n\t" // keep only luma from calc'd value
|
||||
"pmullw %%mm0, %%mm6\n\t" // use more bob for large motion
|
||||
"paddusw %%mm6, %%mm4\n\t" // combine
|
||||
"psrlw $8, %%mm4\n\t" // div by 256 to get weighted avg
|
||||
|
||||
// chroma comes from weave pixel
|
||||
"pand %[UVMask], %%mm2\n\t" // keep chroma
|
||||
"por %%mm4, %%mm2\n\t" // and combine
|
||||
|
||||
V_MOVNTQ ("(%%"XDI")", "%%mm2") // move in our clipped best, use macro
|
||||
|
||||
// bump ptrs and loop
|
||||
LEAX" 8(%%"XAX"), %%"XAX"\n\t"
|
||||
LEAX" 8(%%"XBX"), %%"XBX"\n\t"
|
||||
LEAX" 8(%%"XDX"), %%"XDX"\n\t"
|
||||
LEAX" 8(%%"XDI"), %%"XDI"\n\t"
|
||||
LEAX" 8(%%"XSI"), %%"XSI"\n\t"
|
||||
DECX" %[LoopCtr]\n\t"
|
||||
"jg 1b\n\t" // loop if not to last line
|
||||
// note P-III default assumes backward branches taken
|
||||
"jl 1f\n\t" // done
|
||||
MOVX" %%"XAX", %%"XBX"\n\t" // sharpness lookahead 1 byte only, be wrong on 1
|
||||
"jmp 1b\n\t"
|
||||
|
||||
"1:\n\t"
|
||||
MOVX" %[oldbx], %%"XBX"\n\t"
|
||||
|
||||
: /* no outputs */
|
||||
|
||||
: [LastAvg] "m"(LastAvg),
|
||||
[L1] "m"(L1),
|
||||
[L3] "m"(L3),
|
||||
[L2P] "m"(L2P),
|
||||
[L2] "m"(L2),
|
||||
[Dest] "m"(Dest),
|
||||
[ShiftMask] "m"(ShiftMask),
|
||||
[MaxComb] "m"(MaxComb),
|
||||
[MotionThreshold] "m"(MotionThreshold),
|
||||
[MotionSense] "m"(MotionSense),
|
||||
[QW256B] "m"(QW256B),
|
||||
[YMask] "m"(YMask),
|
||||
[UVMask] "m"(UVMask),
|
||||
[LoopCtr] "m"(LoopCtr),
|
||||
[QW256] "m"(QW256),
|
||||
[oldbx] "m"(oldbx)
|
||||
|
||||
: XAX, XCX, XDX, XSI, XDI,
|
||||
#ifdef HAVE_CPU_I386
|
||||
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
|
||||
#endif
|
||||
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
|
||||
"memory", "cc"
|
||||
);
|
||||
|
||||
Dest += object->output_stride;
|
||||
object->pMemcpy(Dest, L3, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
|
||||
L1 += Pitch;
|
||||
L2 += Pitch;
|
||||
L3 += Pitch;
|
||||
L2P += Pitch;
|
||||
}
|
||||
|
||||
if (InfoIsOdd) {
|
||||
object->pMemcpy(Dest, L2, object->line_length);
|
||||
}
|
||||
|
||||
// clear out the MMX registers ready for doing floating point again
|
||||
#ifdef HAVE_CPU_I386
|
||||
__asm__ __volatile__ ("emms\n\t");
|
||||
#endif
|
||||
}
|
148
gst/deinterlace2/tvtime/greedyh.c
Normal file
148
gst/deinterlace2/tvtime/greedyh.c
Normal file
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include "greedyh.h"
|
||||
#include "greedyhmacros.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gst/gst.h"
|
||||
#include "plugins.h"
|
||||
#include "gstdeinterlace2.h"
|
||||
#include "speedy.h"
|
||||
|
||||
|
||||
#define MAXCOMB_DEFAULT 5
|
||||
#define MOTIONTHRESHOLD_DEFAULT 25
|
||||
#define MOTIONSENSE_DEFAULT 30
|
||||
|
||||
unsigned int GreedyMaxComb;
|
||||
|
||||
unsigned int GreedyMotionThreshold;
|
||||
|
||||
unsigned int GreedyMotionSense;
|
||||
|
||||
|
||||
#define IS_SSE
|
||||
#define SSE_TYPE SSE
|
||||
#define FUNCT_NAME greedyDScaler_SSE
|
||||
#include "greedyh.asm"
|
||||
#undef SSE_TYPE
|
||||
#undef IS_SSE
|
||||
#undef FUNCT_NAME
|
||||
|
||||
#define IS_3DNOW
|
||||
#define FUNCT_NAME greedyDScaler_3DNOW
|
||||
#define SSE_TYPE 3DNOW
|
||||
#include "greedyh.asm"
|
||||
#undef SSE_TYPE
|
||||
#undef IS_3DNOW
|
||||
#undef FUNCT_NAME
|
||||
|
||||
#define IS_MMX
|
||||
#define SSE_TYPE MMX
|
||||
#define FUNCT_NAME greedyDScaler_MMX
|
||||
#include "greedyh.asm"
|
||||
#undef SSE_TYPE
|
||||
#undef IS_MMX
|
||||
#undef FUNCT_NAME
|
||||
|
||||
void
|
||||
deinterlace_frame_di_greedyh (GstDeinterlace2 * object)
|
||||
{
|
||||
if (object->cpu_feature_flags & OIL_IMPL_FLAG_SSE) {
|
||||
greedyh_filter_sse (object);
|
||||
} else if (object->cpu_feature_flags & OIL_IMPL_FLAG_3DNOW) {
|
||||
greedyh_filter_3dnow (object);
|
||||
} else {
|
||||
greedyh_filter_mmx (object);
|
||||
}
|
||||
}
|
||||
|
||||
static deinterlace_method_t greedyh_method = {
|
||||
0, //DEINTERLACE_PLUGIN_API_VERSION,
|
||||
"Motion Adaptive: Advanced Detection",
|
||||
"AdaptiveAdvanced",
|
||||
4,
|
||||
OIL_IMPL_FLAG_MMX,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
deinterlace_frame_di_greedyh,
|
||||
{"Uses heuristics to detect motion in the input",
|
||||
"frames and reconstruct image detail where",
|
||||
"possible. Use this for high quality output",
|
||||
"even on monitors set to an arbitrary refresh",
|
||||
"rate.",
|
||||
"",
|
||||
"Advanced detection uses linear interpolation",
|
||||
"where motion is detected, using a four-field",
|
||||
"buffer. This is the Greedy: High Motion",
|
||||
"deinterlacer from DScaler."}
|
||||
};
|
||||
|
||||
deinterlace_method_t *
|
||||
dscaler_greedyh_get_method (void)
|
||||
{
|
||||
greedyh_init ();
|
||||
return &greedyh_method;
|
||||
}
|
||||
|
||||
void
|
||||
greedyh_init (void)
|
||||
{
|
||||
GreedyMaxComb = MAXCOMB_DEFAULT;
|
||||
GreedyMotionThreshold = MOTIONTHRESHOLD_DEFAULT;
|
||||
GreedyMotionSense = MOTIONSENSE_DEFAULT;
|
||||
}
|
||||
|
||||
void
|
||||
greedyh_filter_mmx (GstDeinterlace2 * object)
|
||||
{
|
||||
greedyDScaler_MMX (object);
|
||||
}
|
||||
|
||||
void
|
||||
greedyh_filter_3dnow (GstDeinterlace2 * object)
|
||||
{
|
||||
greedyDScaler_3DNOW (object);
|
||||
}
|
||||
|
||||
void
|
||||
greedyh_filter_sse (GstDeinterlace2 * object)
|
||||
{
|
||||
greedyDScaler_SSE (object);
|
||||
}
|
45
gst/deinterlace2/tvtime/greedyh.h
Normal file
45
gst/deinterlace2/tvtime/greedyh.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
#ifndef GREEDYH_H_INCLUDED
|
||||
#define GREEDYH_H_INCLUDED
|
||||
|
||||
#include "gstdeinterlace2.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void greedyh_init( void );
|
||||
void greedyh_filter_mmx( GstDeinterlace2 *object );
|
||||
void greedyh_filter_3dnow( GstDeinterlace2 *object );
|
||||
void greedyh_filter_sse( GstDeinterlace2 *object );
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* GREEDYH_H_INCLUDED */
|
74
gst/deinterlace2/tvtime/greedyhmacros.h
Normal file
74
gst/deinterlace2/tvtime/greedyhmacros.h
Normal file
|
@ -0,0 +1,74 @@
|
|||
/////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (c) 2001 Tom Barry. All rights reserved.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// This file is subject to the terms of the GNU General Public License as
|
||||
// published by the Free Software Foundation. A copy of this license is
|
||||
// included with this software distribution in the file COPYING. If you
|
||||
// do not have a copy, you may obtain a copy by writing to the Free
|
||||
// Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
//
|
||||
// This software is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details
|
||||
//
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Define a few macros for CPU dependent instructions.
|
||||
// I suspect I don't really understand how the C macro preprocessor works but
|
||||
// this seems to get the job done. // TRB 7/01
|
||||
|
||||
// BEFORE USING THESE YOU MUST SET:
|
||||
|
||||
// #define SSE_TYPE SSE (or MMX or 3DNOW)
|
||||
|
||||
// some macros for pavgb instruction
|
||||
// V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
|
||||
|
||||
#define V_PAVGB_MMX(mmr1, mmr2, mmrw, smask) \
|
||||
"movq "mmr2", "mmrw"\n\t" \
|
||||
"pand "smask", "mmrw"\n\t" \
|
||||
"psrlw $1, "mmrw"\n\t" \
|
||||
"pand "smask", "mmr1"\n\t" \
|
||||
"psrlw $1, "mmr1"\n\t" \
|
||||
"paddusb "mmrw", "mmr1"\n\t"
|
||||
#define V_PAVGB_SSE(mmr1, mmr2, mmrw, smask) "pavgb "mmr2", "mmr1"\n\t"
|
||||
#define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask) "pavgusb "mmr2", "mmr1"\n\t"
|
||||
#define V_PAVGB(mmr1, mmr2, mmrw, smask) V_PAVGB2(mmr1, mmr2, mmrw, smask, SSE_TYPE)
|
||||
#define V_PAVGB2(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp)
|
||||
#define V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB_##ssetyp(mmr1, mmr2, mmrw, smask)
|
||||
|
||||
// some macros for pmaxub instruction
|
||||
#define V_PMAXUB_MMX(mmr1, mmr2) \
|
||||
"psubusb "mmr2", "mmr1"\n\t" \
|
||||
"paddusb "mmr2", "mmr1"\n\t"
|
||||
#define V_PMAXUB_SSE(mmr1, mmr2) "pmaxub "mmr2", "mmr1"\n\t"
|
||||
#define V_PMAXUB_3DNOW(mmr1, mmr2) V_PMAXUB_MMX(mmr1, mmr2) // use MMX version
|
||||
#define V_PMAXUB(mmr1, mmr2) V_PMAXUB2(mmr1, mmr2, SSE_TYPE)
|
||||
#define V_PMAXUB2(mmr1, mmr2, ssetyp) V_PMAXUB3(mmr1, mmr2, ssetyp)
|
||||
#define V_PMAXUB3(mmr1, mmr2, ssetyp) V_PMAXUB_##ssetyp(mmr1, mmr2)
|
||||
|
||||
// some macros for pminub instruction
|
||||
// V_PMINUB(mmr1, mmr2, mmr work register) mmr2 may NOT = mmrw
|
||||
#define V_PMINUB_MMX(mmr1, mmr2, mmrw) \
|
||||
"pcmpeqb "mmrw", "mmrw"\n\t" \
|
||||
"psubusb "mmr2", "mmrw"\n\t" \
|
||||
"paddusb "mmrw", "mmr1"\n\t" \
|
||||
"psubusb "mmrw", "mmr1"\n\t"
|
||||
#define V_PMINUB_SSE(mmr1, mmr2, mmrw) "pminub "mmr2", "mmr1"\n\t"
|
||||
#define V_PMINUB_3DNOW(mmr1, mmr2, mmrw) V_PMINUB_MMX(mmr1, mmr2, mmrw) // use MMX version
|
||||
#define V_PMINUB(mmr1, mmr2, mmrw) V_PMINUB2(mmr1, mmr2, mmrw, SSE_TYPE)
|
||||
#define V_PMINUB2(mmr1, mmr2, mmrw, ssetyp) V_PMINUB3(mmr1, mmr2, mmrw, ssetyp)
|
||||
#define V_PMINUB3(mmr1, mmr2, mmrw, ssetyp) V_PMINUB_##ssetyp(mmr1, mmr2, mmrw)
|
||||
|
||||
// some macros for movntq instruction
|
||||
// V_MOVNTQ(mmr1, mmr2)
|
||||
#define V_MOVNTQ_MMX(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
|
||||
#define V_MOVNTQ_3DNOW(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
|
||||
#define V_MOVNTQ_SSE(mmr1, mmr2) "movntq "mmr2", "mmr1"\n\t"
|
||||
#define V_MOVNTQ(mmr1, mmr2) V_MOVNTQ2(mmr1, mmr2, SSE_TYPE)
|
||||
#define V_MOVNTQ2(mmr1, mmr2, ssetyp) V_MOVNTQ3(mmr1, mmr2, ssetyp)
|
||||
#define V_MOVNTQ3(mmr1, mmr2, ssetyp) V_MOVNTQ_##ssetyp(mmr1, mmr2)
|
||||
|
||||
// end of macros
|
723
gst/deinterlace2/tvtime/mmx.h
Normal file
723
gst/deinterlace2/tvtime/mmx.h
Normal file
|
@ -0,0 +1,723 @@
|
|||
/* mmx.h
|
||||
|
||||
MultiMedia eXtensions GCC interface library for IA32.
|
||||
|
||||
To use this library, simply include this header file
|
||||
and compile with GCC. You MUST have inlining enabled
|
||||
in order for mmx_ok() to work; this can be done by
|
||||
simply using -O on the GCC command line.
|
||||
|
||||
Compiling with -DMMX_TRACE will cause detailed trace
|
||||
output to be sent to stderr for each mmx operation.
|
||||
This adds lots of code, and obviously slows execution to
|
||||
a crawl, but can be very useful for debugging.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
|
||||
LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||
|
||||
1997-98 by H. Dietz and R. Fisher
|
||||
|
||||
History:
|
||||
97-98* R.Fisher Early versions
|
||||
980501 R.Fisher Original Release
|
||||
980611* H.Dietz Rewrite, correctly implementing inlines, and
|
||||
R.Fisher including direct register accesses.
|
||||
980616 R.Fisher Release of 980611 as 980616.
|
||||
980714 R.Fisher Minor corrections to Makefile, etc.
|
||||
980715 R.Fisher mmx_ok() now prevents optimizer from using
|
||||
clobbered values.
|
||||
mmx_ok() now checks if cpuid instruction is
|
||||
available before trying to use it.
|
||||
980726* R.Fisher mm_support() searches for AMD 3DNow, Cyrix
|
||||
Extended MMX, and standard MMX. It returns a
|
||||
value which is positive if any of these are
|
||||
supported, and can be masked with constants to
|
||||
see which. mmx_ok() is now a call to this
|
||||
980726* R.Fisher Added i2r support for shift functions
|
||||
980919 R.Fisher Fixed AMD extended feature recognition bug.
|
||||
980921 R.Fisher Added definition/check for _MMX_H.
|
||||
Added "float s[2]" to mmx_t for use with
|
||||
3DNow and EMMX. So same mmx_t can be used.
|
||||
981013 R.Fisher Fixed cpuid function 1 bug (looked at wrong reg)
|
||||
Fixed psllq_i2r error in mmxtest.c
|
||||
|
||||
* Unreleased (internal or interim) versions
|
||||
|
||||
Notes:
|
||||
It appears that the latest gas has the pand problem fixed, therefore
|
||||
I'll undefine BROKEN_PAND by default.
|
||||
String compares may be quicker than the multiple test/jumps in vendor
|
||||
test sequence in mmx_ok(), but I'm not concerned with that right now.
|
||||
|
||||
Acknowledgments:
|
||||
Jussi Laako for pointing out the errors ultimately found to be
|
||||
connected to the failure to notify the optimizer of clobbered values.
|
||||
Roger Hardiman for reminding us that CPUID isn't everywhere, and that
|
||||
someone may actually try to use this on a machine without CPUID.
|
||||
Also for suggesting code for checking this.
|
||||
Robert Dale for pointing out the AMD recognition bug.
|
||||
Jimmy Mayfield and Carl Witty for pointing out the Intel recognition
|
||||
bug.
|
||||
Carl Witty for pointing out the psllq_i2r test bug.
|
||||
*/
|
||||
|
||||
#ifndef _MMX_H
|
||||
#define _MMX_H
|
||||
|
||||
/*#define MMX_TRACE */
|
||||
|
||||
/* Warning: at this writing, the version of GAS packaged
|
||||
with most Linux distributions does not handle the
|
||||
parallel AND operation mnemonic correctly. If the
|
||||
symbol BROKEN_PAND is defined, a slower alternative
|
||||
coding will be used. If execution of mmxtest results
|
||||
in an illegal instruction fault, define this symbol.
|
||||
*/
|
||||
#undef BROKEN_PAND
|
||||
|
||||
|
||||
/* The type of an value that fits in an MMX register
|
||||
(note that long long constant values MUST be suffixed
|
||||
by LL and unsigned long long values by ULL, lest
|
||||
they be truncated by the compiler)
|
||||
*/
|
||||
typedef union {
|
||||
long long q; /* Quadword (64-bit) value */
|
||||
unsigned long long uq; /* Unsigned Quadword */
|
||||
int d[2]; /* 2 Doubleword (32-bit) values */
|
||||
unsigned int ud[2]; /* 2 Unsigned Doubleword */
|
||||
short w[4]; /* 4 Word (16-bit) values */
|
||||
unsigned short uw[4]; /* 4 Unsigned Word */
|
||||
char b[8]; /* 8 Byte (8-bit) values */
|
||||
unsigned char ub[8]; /* 8 Unsigned Byte */
|
||||
float s[2]; /* Single-precision (32-bit) value */
|
||||
} mmx_t;
|
||||
|
||||
|
||||
/* Function to test if multimedia instructions are supported...
|
||||
*/
|
||||
inline extern int
|
||||
mm_support(void)
|
||||
{
|
||||
/* Returns 1 if MMX instructions are supported,
|
||||
3 if Cyrix MMX and Extended MMX instructions are supported
|
||||
5 if AMD MMX and 3DNow! instructions are supported
|
||||
0 if hardware does not support any of these
|
||||
*/
|
||||
register int rval = 0;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* See if CPUID instruction is supported ... */
|
||||
/* ... Get copies of EFLAGS into eax and ecx */
|
||||
"pushf\n\t"
|
||||
"popl %%eax\n\t"
|
||||
"movl %%eax, %%ecx\n\t"
|
||||
|
||||
/* ... Toggle the ID bit in one copy and store */
|
||||
/* to the EFLAGS reg */
|
||||
"xorl $0x200000, %%eax\n\t"
|
||||
"push %%eax\n\t"
|
||||
"popf\n\t"
|
||||
|
||||
/* ... Get the (hopefully modified) EFLAGS */
|
||||
"pushf\n\t"
|
||||
"popl %%eax\n\t"
|
||||
|
||||
/* ... Compare and test result */
|
||||
"xorl %%eax, %%ecx\n\t"
|
||||
"testl $0x200000, %%ecx\n\t"
|
||||
"jz NotSupported1\n\t" /* Nothing supported */
|
||||
|
||||
|
||||
/* Get standard CPUID information, and
|
||||
go to a specific vendor section */
|
||||
"movl $0, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
|
||||
/* Check for Intel */
|
||||
"cmpl $0x756e6547, %%ebx\n\t"
|
||||
"jne TryAMD\n\t"
|
||||
"cmpl $0x49656e69, %%edx\n\t"
|
||||
"jne TryAMD\n\t"
|
||||
"cmpl $0x6c65746e, %%ecx\n"
|
||||
"jne TryAMD\n\t"
|
||||
"jmp Intel\n\t"
|
||||
|
||||
/* Check for AMD */
|
||||
"\nTryAMD:\n\t"
|
||||
"cmpl $0x68747541, %%ebx\n\t"
|
||||
"jne TryCyrix\n\t"
|
||||
"cmpl $0x69746e65, %%edx\n\t"
|
||||
"jne TryCyrix\n\t"
|
||||
"cmpl $0x444d4163, %%ecx\n"
|
||||
"jne TryCyrix\n\t"
|
||||
"jmp AMD\n\t"
|
||||
|
||||
/* Check for Cyrix */
|
||||
"\nTryCyrix:\n\t"
|
||||
"cmpl $0x69727943, %%ebx\n\t"
|
||||
"jne NotSupported2\n\t"
|
||||
"cmpl $0x736e4978, %%edx\n\t"
|
||||
"jne NotSupported3\n\t"
|
||||
"cmpl $0x64616574, %%ecx\n\t"
|
||||
"jne NotSupported4\n\t"
|
||||
/* Drop through to Cyrix... */
|
||||
|
||||
|
||||
/* Cyrix Section */
|
||||
/* See if extended CPUID is supported */
|
||||
"movl $0x80000000, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"cmpl $0x80000000, %%eax\n\t"
|
||||
"jl MMXtest\n\t" /* Try standard CPUID instead */
|
||||
|
||||
/* Extended CPUID supported, so get extended features */
|
||||
"movl $0x80000001, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"testl $0x00800000, %%eax\n\t" /* Test for MMX */
|
||||
"jz NotSupported5\n\t" /* MMX not supported */
|
||||
"testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
|
||||
"jnz EMMXSupported\n\t"
|
||||
"movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\n"
|
||||
"EMMXSupported:\n\t"
|
||||
"movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
|
||||
|
||||
/* AMD Section */
|
||||
"AMD:\n\t"
|
||||
|
||||
/* See if extended CPUID is supported */
|
||||
"movl $0x80000000, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"cmpl $0x80000000, %%eax\n\t"
|
||||
"jl MMXtest\n\t" /* Try standard CPUID instead */
|
||||
|
||||
/* Extended CPUID supported, so get extended features */
|
||||
"movl $0x80000001, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"testl $0x00800000, %%edx\n\t" /* Test for MMX */
|
||||
"jz NotSupported6\n\t" /* MMX not supported */
|
||||
"testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
|
||||
"jnz ThreeDNowSupported\n\t"
|
||||
"movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\n"
|
||||
"ThreeDNowSupported:\n\t"
|
||||
"movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
|
||||
|
||||
/* Intel Section */
|
||||
"Intel:\n\t"
|
||||
|
||||
/* Check for MMX */
|
||||
"MMXtest:\n\t"
|
||||
"movl $1, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"testl $0x00800000, %%edx\n\t" /* Test for MMX */
|
||||
"jz NotSupported7\n\t" /* MMX Not supported */
|
||||
"movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
|
||||
/* Nothing supported */
|
||||
"\nNotSupported1:\n\t"
|
||||
"#movl $101, %0:\n\n\t"
|
||||
"\nNotSupported2:\n\t"
|
||||
"#movl $102, %0:\n\n\t"
|
||||
"\nNotSupported3:\n\t"
|
||||
"#movl $103, %0:\n\n\t"
|
||||
"\nNotSupported4:\n\t"
|
||||
"#movl $104, %0:\n\n\t"
|
||||
"\nNotSupported5:\n\t"
|
||||
"#movl $105, %0:\n\n\t"
|
||||
"\nNotSupported6:\n\t"
|
||||
"#movl $106, %0:\n\n\t"
|
||||
"\nNotSupported7:\n\t"
|
||||
"#movl $107, %0:\n\n\t"
|
||||
"movl $0, %0:\n\n\t"
|
||||
|
||||
"Return:\n\t"
|
||||
: "=a" (rval)
|
||||
: /* no input */
|
||||
: "eax", "ebx", "ecx", "edx"
|
||||
);
|
||||
|
||||
/* Return */
|
||||
return(rval);
|
||||
}
|
||||
|
||||
/* Function to test if mmx instructions are supported...
|
||||
*/
|
||||
inline extern int
|
||||
mmx_ok(void)
|
||||
{
|
||||
/* Returns 1 if MMX instructions are supported, 0 otherwise */
|
||||
return ( mm_support() & 0x1 );
|
||||
}
|
||||
|
||||
|
||||
/* Helper functions for the instruction macros that follow...
|
||||
(note that memory-to-register, m2r, instructions are nearly
|
||||
as efficient as register-to-register, r2r, instructions;
|
||||
however, memory-to-memory instructions are really simulated
|
||||
as a convenience, and are only 1/3 as efficient)
|
||||
*/
|
||||
#ifdef MMX_TRACE
|
||||
|
||||
/* Include the stuff for printing a trace to stderr...
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define mmx_i2r(op, imm, reg) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
mmx_trace = (imm); \
|
||||
fprintf(stderr, #op "_i2r(" #imm "=0x%016llx, ", mmx_trace.q); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (imm)); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
|
||||
}
|
||||
|
||||
#define mmx_m2r(op, mem, reg) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
mmx_trace = (mem); \
|
||||
fprintf(stderr, #op "_m2r(" #mem "=0x%016llx, ", mmx_trace.q); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (mem)); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
|
||||
}
|
||||
|
||||
#define mmx_r2m(op, reg, mem) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #op "_r2m(" #reg "=0x%016llx, ", mmx_trace.q); \
|
||||
mmx_trace = (mem); \
|
||||
fprintf(stderr, #mem "=0x%016llx) => ", mmx_trace.q); \
|
||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||
: "=X" (mem) \
|
||||
: /* nothing */ ); \
|
||||
mmx_trace = (mem); \
|
||||
fprintf(stderr, #mem "=0x%016llx\n", mmx_trace.q); \
|
||||
}
|
||||
|
||||
#define mmx_r2r(op, regs, regd) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
__asm__ __volatile__ ("movq %%" #regs ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #op "_r2r(" #regs "=0x%016llx, ", mmx_trace.q); \
|
||||
__asm__ __volatile__ ("movq %%" #regd ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #regd "=0x%016llx) => ", mmx_trace.q); \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
|
||||
__asm__ __volatile__ ("movq %%" #regd ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #regd "=0x%016llx\n", mmx_trace.q); \
|
||||
}
|
||||
|
||||
#define mmx_m2m(op, mems, memd) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
mmx_trace = (mems); \
|
||||
fprintf(stderr, #op "_m2m(" #mems "=0x%016llx, ", mmx_trace.q); \
|
||||
mmx_trace = (memd); \
|
||||
fprintf(stderr, #memd "=0x%016llx) => ", mmx_trace.q); \
|
||||
__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
|
||||
#op " %1, %%mm0\n\t" \
|
||||
"movq %%mm0, %0" \
|
||||
: "=X" (memd) \
|
||||
: "X" (mems)); \
|
||||
mmx_trace = (memd); \
|
||||
fprintf(stderr, #memd "=0x%016llx\n", mmx_trace.q); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* These macros are a lot simpler without the tracing...
|
||||
*/
|
||||
|
||||
#define mmx_i2r(op, imm, reg) \
|
||||
__asm__ __volatile__ (#op " $" #imm ", %%" #reg \
|
||||
: /* nothing */ \
|
||||
: /* nothing */);
|
||||
|
||||
#define mmx_m2r(op, mem, reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "m" (mem))
|
||||
|
||||
#define mmx_r2m(op, reg, mem) \
|
||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||
: "=m" (mem) \
|
||||
: /* nothing */ )
|
||||
|
||||
#define mmx_r2r(op, regs, regd) \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
|
||||
|
||||
#define mmx_m2m(op, mems, memd) \
|
||||
__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
|
||||
#op " %1, %%mm0\n\t" \
|
||||
"movq %%mm0, %0" \
|
||||
: "=m" (memd) \
|
||||
: "m" (mems))
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* 1x64 MOVe Quadword
|
||||
(this is both a load and a store...
|
||||
in fact, it is the only way to store)
|
||||
*/
|
||||
#define movq_m2r(var, reg) mmx_m2r(movq, var, reg)
|
||||
#define movq_r2m(reg, var) mmx_r2m(movq, reg, var)
|
||||
#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd)
|
||||
#define movq(vars, vard) \
|
||||
__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
|
||||
"movq %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* 1x32 MOVe Doubleword
|
||||
(like movq, this is both load and store...
|
||||
but is most useful for moving things between
|
||||
mmx registers and ordinary registers)
|
||||
*/
|
||||
#define movd_m2r(var, reg) mmx_m2r(movd, var, reg)
|
||||
#define movd_r2m(reg, var) mmx_r2m(movd, reg, var)
|
||||
#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd)
|
||||
#define movd(vars, vard) \
|
||||
__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
|
||||
"movd %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* 2x32, 4x16, and 8x8 Parallel ADDs
|
||||
*/
|
||||
#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg)
|
||||
#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd)
|
||||
#define paddd(vars, vard) mmx_m2m(paddd, vars, vard)
|
||||
|
||||
#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg)
|
||||
#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd)
|
||||
#define paddw(vars, vard) mmx_m2m(paddw, vars, vard)
|
||||
|
||||
#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg)
|
||||
#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd)
|
||||
#define paddb(vars, vard) mmx_m2m(paddb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic
|
||||
*/
|
||||
#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg)
|
||||
#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd)
|
||||
#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard)
|
||||
|
||||
#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg)
|
||||
#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd)
|
||||
#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
|
||||
*/
|
||||
#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg)
|
||||
#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd)
|
||||
#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard)
|
||||
|
||||
#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg)
|
||||
#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd)
|
||||
#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard)
|
||||
|
||||
|
||||
/* 2x32, 4x16, and 8x8 Parallel SUBs
|
||||
*/
|
||||
#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg)
|
||||
#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd)
|
||||
#define psubd(vars, vard) mmx_m2m(psubd, vars, vard)
|
||||
|
||||
#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg)
|
||||
#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd)
|
||||
#define psubw(vars, vard) mmx_m2m(psubw, vars, vard)
|
||||
|
||||
#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg)
|
||||
#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd)
|
||||
#define psubb(vars, vard) mmx_m2m(psubb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic
|
||||
*/
|
||||
#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg)
|
||||
#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd)
|
||||
#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard)
|
||||
|
||||
#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg)
|
||||
#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd)
|
||||
#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
|
||||
*/
|
||||
#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg)
|
||||
#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd)
|
||||
#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard)
|
||||
|
||||
#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg)
|
||||
#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd)
|
||||
#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 Parallel MULs giving Low 4x16 portions of results
|
||||
*/
|
||||
#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg)
|
||||
#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd)
|
||||
#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 Parallel MULs giving High 4x16 portions of results
|
||||
*/
|
||||
#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg)
|
||||
#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd)
|
||||
#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard)
|
||||
|
||||
|
||||
/* 4x16->2x32 Parallel Mul-ADD
|
||||
(muls like pmullw, then adds adjacent 16-bit fields
|
||||
in the multiply result to make the final 2x32 result)
|
||||
*/
|
||||
#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg)
|
||||
#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd)
|
||||
#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard)
|
||||
|
||||
|
||||
/* 1x64 bitwise AND
|
||||
*/
|
||||
#ifdef BROKEN_PAND
|
||||
#define pand_m2r(var, reg) \
|
||||
{ \
|
||||
mmx_m2r(pandn, (mmx_t) -1LL, reg); \
|
||||
mmx_m2r(pandn, var, reg); \
|
||||
}
|
||||
#define pand_r2r(regs, regd) \
|
||||
{ \
|
||||
mmx_m2r(pandn, (mmx_t) -1LL, regd); \
|
||||
mmx_r2r(pandn, regs, regd); \
|
||||
}
|
||||
#define pand(vars, vard) \
|
||||
{ \
|
||||
movq_m2r(vard, mm0); \
|
||||
mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
|
||||
mmx_m2r(pandn, vars, mm0); \
|
||||
movq_r2m(mm0, vard); \
|
||||
}
|
||||
#else
|
||||
#define pand_m2r(var, reg) mmx_m2r(pand, var, reg)
|
||||
#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd)
|
||||
#define pand(vars, vard) mmx_m2m(pand, vars, vard)
|
||||
#endif
|
||||
|
||||
|
||||
/* 1x64 bitwise AND with Not the destination
|
||||
*/
|
||||
#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg)
|
||||
#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd)
|
||||
#define pandn(vars, vard) mmx_m2m(pandn, vars, vard)
|
||||
|
||||
|
||||
/* 1x64 bitwise OR
|
||||
*/
|
||||
#define por_m2r(var, reg) mmx_m2r(por, var, reg)
|
||||
#define por_r2r(regs, regd) mmx_r2r(por, regs, regd)
|
||||
#define por(vars, vard) mmx_m2m(por, vars, vard)
|
||||
|
||||
|
||||
/* 1x64 bitwise eXclusive OR
|
||||
*/
|
||||
#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg)
|
||||
#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd)
|
||||
#define pxor(vars, vard) mmx_m2m(pxor, vars, vard)
|
||||
|
||||
|
||||
/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
|
||||
(resulting fields are either 0 or -1)
|
||||
*/
|
||||
#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg)
|
||||
#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd)
|
||||
#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard)
|
||||
|
||||
#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg)
|
||||
#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd)
|
||||
#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard)
|
||||
|
||||
#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg)
|
||||
#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd)
|
||||
#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard)
|
||||
|
||||
|
||||
/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
|
||||
(resulting fields are either 0 or -1)
|
||||
*/
|
||||
#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg)
|
||||
#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd)
|
||||
#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard)
|
||||
|
||||
#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg)
|
||||
#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd)
|
||||
#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard)
|
||||
|
||||
#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg)
|
||||
#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd)
|
||||
#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard)
|
||||
|
||||
|
||||
/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical
|
||||
*/
|
||||
#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg)
|
||||
#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg)
|
||||
#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd)
|
||||
#define psllq(vars, vard) mmx_m2m(psllq, vars, vard)
|
||||
|
||||
#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg)
|
||||
#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg)
|
||||
#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd)
|
||||
#define pslld(vars, vard) mmx_m2m(pslld, vars, vard)
|
||||
|
||||
#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg)
|
||||
#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg)
|
||||
#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd)
|
||||
#define psllw(vars, vard) mmx_m2m(psllw, vars, vard)
|
||||
|
||||
|
||||
/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical
|
||||
*/
|
||||
#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg)
|
||||
#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg)
|
||||
#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd)
|
||||
#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard)
|
||||
|
||||
#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg)
|
||||
#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg)
|
||||
#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd)
|
||||
#define psrld(vars, vard) mmx_m2m(psrld, vars, vard)
|
||||
|
||||
#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg)
|
||||
#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg)
|
||||
#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd)
|
||||
#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard)
|
||||
|
||||
|
||||
/* 2x32 and 4x16 Parallel Shift Right Arithmetic
|
||||
*/
|
||||
#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg)
|
||||
#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg)
|
||||
#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd)
|
||||
#define psrad(vars, vard) mmx_m2m(psrad, vars, vard)
|
||||
|
||||
#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg)
|
||||
#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg)
|
||||
#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd)
|
||||
#define psraw(vars, vard) mmx_m2m(psraw, vars, vard)
|
||||
|
||||
|
||||
/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
|
||||
(packs source and dest fields into dest in that order)
|
||||
*/
|
||||
#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg)
|
||||
#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
|
||||
#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard)
|
||||
|
||||
#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg)
|
||||
#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
|
||||
#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16->8x8 PACK and Unsigned Saturate
|
||||
(packs source and dest fields into dest in that order)
|
||||
*/
|
||||
#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg)
|
||||
#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
|
||||
#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard)
|
||||
|
||||
|
||||
/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
|
||||
(interleaves low half of dest with low half of source
|
||||
as padding in each result field)
|
||||
*/
|
||||
#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg)
|
||||
#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
|
||||
#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard)
|
||||
|
||||
#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg)
|
||||
#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
|
||||
#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard)
|
||||
|
||||
#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg)
|
||||
#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
|
||||
#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard)
|
||||
|
||||
|
||||
/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
|
||||
(interleaves high half of dest with high half of source
|
||||
as padding in each result field)
|
||||
*/
|
||||
#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg)
|
||||
#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
|
||||
#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard)
|
||||
|
||||
#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg)
|
||||
#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
|
||||
#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard)
|
||||
|
||||
#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg)
|
||||
#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
|
||||
#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard)
|
||||
|
||||
|
||||
/* Empty MMx State
|
||||
(used to clean-up when going from mmx to float use
|
||||
of the registers that are shared by both; note that
|
||||
there is no float-to-mmx operation needed, because
|
||||
only the float tag word info is corruptible)
|
||||
*/
|
||||
#ifdef MMX_TRACE
|
||||
|
||||
#define emms() \
|
||||
{ \
|
||||
fprintf(stderr, "emms()\n"); \
|
||||
__asm__ __volatile__ ("emms"); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define emms() __asm__ __volatile__ ("emms")
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
42
gst/deinterlace2/tvtime/plugins.h
Normal file
42
gst/deinterlace2/tvtime/plugins.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
#ifndef TVTIME_PLUGINS_H_INCLUDED
|
||||
#define TVTIME_PLUGINS_H_INCLUDED
|
||||
|
||||
deinterlace_method_t* dscaler_tomsmocomp_get_method( void );
|
||||
deinterlace_method_t* dscaler_greedyh_get_method( void );
|
||||
deinterlace_method_t* dscaler_greedyl_get_method( void );
|
||||
deinterlace_method_t* dscaler_vfir_get_method( void );
|
||||
|
||||
//void linear_plugin_init( void );
|
||||
//void scalerbob_plugin_init( void );
|
||||
//void linearblend_plugin_init( void );
|
||||
//void weave_plugin_init( void );
|
||||
//void weavetff_plugin_init( void );
|
||||
//void weavebff_plugin_init( void );
|
||||
|
||||
#endif /* TVTIME_PLUGINS_H_INCLUDED */
|
54
gst/deinterlace2/tvtime/speedtools.h
Normal file
54
gst/deinterlace2/tvtime/speedtools.h
Normal file
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
#ifndef SPEEDTOOLS_H_INCLUDED
|
||||
#define SPEEDTOOLS_H_INCLUDED
|
||||
|
||||
#define PREFETCH_2048(x) \
|
||||
{ int *pfetcha = (int *) x; \
|
||||
prefetchnta( pfetcha ); \
|
||||
prefetchnta( pfetcha + 64 ); \
|
||||
prefetchnta( pfetcha + 128 ); \
|
||||
prefetchnta( pfetcha + 192 ); \
|
||||
pfetcha += 256; \
|
||||
prefetchnta( pfetcha ); \
|
||||
prefetchnta( pfetcha + 64 ); \
|
||||
prefetchnta( pfetcha + 128 ); \
|
||||
prefetchnta( pfetcha + 192 ); }
|
||||
|
||||
#define READ_PREFETCH_2048(x) \
|
||||
{ int *pfetcha = (int *) x; int pfetchtmp; \
|
||||
pfetchtmp = pfetcha[ 0 ] + pfetcha[ 16 ] + pfetcha[ 32 ] + pfetcha[ 48 ] + \
|
||||
pfetcha[ 64 ] + pfetcha[ 80 ] + pfetcha[ 96 ] + pfetcha[ 112 ] + \
|
||||
pfetcha[ 128 ] + pfetcha[ 144 ] + pfetcha[ 160 ] + pfetcha[ 176 ] + \
|
||||
pfetcha[ 192 ] + pfetcha[ 208 ] + pfetcha[ 224 ] + pfetcha[ 240 ]; \
|
||||
pfetcha += 256; \
|
||||
pfetchtmp = pfetcha[ 0 ] + pfetcha[ 16 ] + pfetcha[ 32 ] + pfetcha[ 48 ] + \
|
||||
pfetcha[ 64 ] + pfetcha[ 80 ] + pfetcha[ 96 ] + pfetcha[ 112 ] + \
|
||||
pfetcha[ 128 ] + pfetcha[ 144 ] + pfetcha[ 160 ] + pfetcha[ 176 ] + \
|
||||
pfetcha[ 192 ] + pfetcha[ 208 ] + pfetcha[ 224 ] + pfetcha[ 240 ]; }
|
||||
|
||||
#endif /* SPEEDTOOLS_H_INCLUDED */
|
2791
gst/deinterlace2/tvtime/speedy.c
Normal file
2791
gst/deinterlace2/tvtime/speedy.c
Normal file
File diff suppressed because it is too large
Load diff
308
gst/deinterlace2/tvtime/speedy.h
Normal file
308
gst/deinterlace2/tvtime/speedy.h
Normal file
|
@ -0,0 +1,308 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
#ifndef SPEEDY_H_INCLUDED
|
||||
#define SPEEDY_H_INCLUDED
|
||||
|
||||
#if defined (__SVR4) && defined (__sun)
|
||||
# include <sys/int_types.h>
|
||||
#else
|
||||
# include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Speedy is a collection of optimized functions plus their C fallbacks.
|
||||
* This includes a simple system to select which functions to use
|
||||
* at runtime.
|
||||
*
|
||||
* The optimizations are done with the help of the mmx.h system, from
|
||||
* libmpeg2 by Michel Lespinasse and Aaron Holtzman.
|
||||
*
|
||||
* The library is a collection of function pointers which must be first
|
||||
* initialized by setup_speedy_calls() to point at the fastest available
|
||||
* implementation of each function.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Struct for pulldown detection metrics.
|
||||
*/
|
||||
typedef struct pulldown_metrics_s {
|
||||
/* difference: total, even lines, odd lines */
|
||||
int d, e, o;
|
||||
/* noise: temporal, spacial (current), spacial (past) */
|
||||
int t, s, p;
|
||||
} pulldown_metrics_t;
|
||||
|
||||
/**
|
||||
* Interpolates a packed 4:2:2 scanline using linear interpolation.
|
||||
*/
|
||||
extern void (*interpolate_packed422_scanline)( uint8_t *output, uint8_t *top,
|
||||
uint8_t *bot, int width );
|
||||
|
||||
/**
|
||||
* Blits a colour to a packed 4:2:2 scanline.
|
||||
*/
|
||||
extern void (*blit_colour_packed422_scanline)( uint8_t *output,
|
||||
int width, int y, int cb, int cr );
|
||||
|
||||
/**
|
||||
* Blits a colour to a packed 4:4:4:4 scanline. I use luma/cb/cr instead of
|
||||
* RGB but this will of course work for either.
|
||||
*/
|
||||
extern void (*blit_colour_packed4444_scanline)( uint8_t *output,
|
||||
int width, int alpha, int luma,
|
||||
int cb, int cr );
|
||||
|
||||
/**
|
||||
* Blit from and to packed 4:2:2 scanline.
|
||||
*/
|
||||
extern void (*blit_packed422_scanline)( uint8_t *dest, const uint8_t *src, int width );
|
||||
|
||||
/**
|
||||
* Composites a premultiplied 4:4:4:4 pixel onto a packed 4:2:2 scanline.
|
||||
*/
|
||||
extern void (*composite_colour4444_alpha_to_packed422_scanline)( uint8_t *output, uint8_t *input,
|
||||
int af, int y, int cb, int cr,
|
||||
int width, int alpha );
|
||||
|
||||
/**
|
||||
* Composites a packed 4:4:4:4 scanline onto a packed 4:2:2 scanline.
|
||||
* Chroma is downsampled by dropping samples (nearest neighbour).
|
||||
*/
|
||||
extern void (*composite_packed4444_to_packed422_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
uint8_t *foreground,
|
||||
int width );
|
||||
|
||||
/**
|
||||
* Composites a packed 4:4:4:4 scanline onto a packed 4:2:2 scanline.
|
||||
* Chroma is downsampled by dropping samples (nearest neighbour). The
|
||||
* alpha value provided is in the range 0-256 and is first applied to
|
||||
* the input (for fadeouts).
|
||||
*/
|
||||
extern void (*composite_packed4444_alpha_to_packed422_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
uint8_t *foreground,
|
||||
int width, int alpha );
|
||||
|
||||
/**
|
||||
* Takes an alphamask and the given colour (in Y'CbCr) and composites it
|
||||
* onto a packed 4:4:4:4 scanline.
|
||||
*/
|
||||
extern void (*composite_alphamask_to_packed4444_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
uint8_t *mask, int width,
|
||||
int textluma, int textcb,
|
||||
int textcr );
|
||||
|
||||
/**
|
||||
* Takes an alphamask and the given colour (in Y'CbCr) and composites it
|
||||
* onto a packed 4:4:4:4 scanline. The alpha value provided is in the
|
||||
* range 0-256 and is first applied to the input (for fadeouts).
|
||||
*/
|
||||
extern void (*composite_alphamask_alpha_to_packed4444_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
uint8_t *mask, int width,
|
||||
int textluma, int textcb,
|
||||
int textcr, int alpha );
|
||||
|
||||
/**
|
||||
* Premultiplies the colour by the alpha channel in a packed 4:4:4:4
|
||||
* scanline.
|
||||
*/
|
||||
extern void (*premultiply_packed4444_scanline)( uint8_t *output, uint8_t *input, int width );
|
||||
|
||||
/**
|
||||
* Blend between two packed 4:2:2 scanline. Pos is the fade value in
|
||||
* the range 0-256. A value of 0 gives 100% src1, and a value of 256
|
||||
* gives 100% src2. Anything in between gives the appropriate faded
|
||||
* version.
|
||||
*/
|
||||
extern void (*blend_packed422_scanline)( uint8_t *output, uint8_t *src1,
|
||||
uint8_t *src2, int width, int pos );
|
||||
|
||||
/**
|
||||
* Calculates the 'difference factor' for two scanlines. This is a
|
||||
* metric where higher values indicate that the two scanlines are more
|
||||
* different.
|
||||
*/
|
||||
extern unsigned int (*diff_factor_packed422_scanline)( uint8_t *cur, uint8_t *old, int width );
|
||||
|
||||
/**
|
||||
* Calculates the 'comb factor' for a set of three scanlines. This is a
|
||||
* metric where higher values indicate a more likely chance that the two
|
||||
* fields are at separate points in time.
|
||||
*/
|
||||
extern unsigned int (*comb_factor_packed422_scanline)( uint8_t *top, uint8_t *mid,
|
||||
uint8_t *bot, int width );
|
||||
|
||||
/**
|
||||
* Vertical [1 2 1] chroma filter.
|
||||
*/
|
||||
extern void (*vfilter_chroma_121_packed422_scanline)( uint8_t *output, int width,
|
||||
uint8_t *m, uint8_t *t, uint8_t *b );
|
||||
|
||||
/**
|
||||
* Vertical [3 3 2] chroma filter.
|
||||
*/
|
||||
extern void (*vfilter_chroma_332_packed422_scanline)( uint8_t *output, int width,
|
||||
uint8_t *m, uint8_t *t, uint8_t *b );
|
||||
|
||||
/**
|
||||
* Sets the chroma of the scanline to neutral (128) in-place.
|
||||
*/
|
||||
extern void (*kill_chroma_packed422_inplace_scanline)( uint8_t *data, int width );
|
||||
|
||||
/**
|
||||
* Mirrors the scanline in-place.
|
||||
*/
|
||||
extern void (*mirror_packed422_inplace_scanline)( uint8_t *data, int width );
|
||||
|
||||
/**
|
||||
* Inverts the colours on a scanline in-place.
|
||||
*/
|
||||
extern void (*invert_colour_packed422_inplace_scanline)( uint8_t *data, int width );
|
||||
|
||||
/**
|
||||
* Fast memcpy function, used by all of the blit functions. Won't blit
|
||||
* anything if dest == src.
|
||||
*/
|
||||
extern void (*speedy_memcpy)( void *output, const void *input, size_t size );
|
||||
|
||||
/**
|
||||
* Calculates the block difference metrics for dalias' pulldown
|
||||
* detection algorithm.
|
||||
*/
|
||||
extern void (*diff_packed422_block8x8)( pulldown_metrics_t *m, uint8_t *old,
|
||||
uint8_t *new, int os, int ns );
|
||||
|
||||
/**
|
||||
* Takes an alpha mask and subpixelly blits it using linear
|
||||
* interpolation.
|
||||
*/
|
||||
extern void (*a8_subpix_blit_scanline)( uint8_t *output, uint8_t *input,
|
||||
int lasta, int startpos, int width );
|
||||
|
||||
/**
|
||||
* 1/4 vertical subpixel blit for packed 4:2:2 scanlines using linear
|
||||
* interpolation.
|
||||
*/
|
||||
extern void (*quarter_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *one,
|
||||
uint8_t *three, int width );
|
||||
|
||||
/**
|
||||
* Vertical subpixel blit for packed 4:2:2 scanlines using linear
|
||||
* interpolation.
|
||||
*/
|
||||
extern void (*subpix_blit_vertical_packed422_scanline)( uint8_t *output, uint8_t *top,
|
||||
uint8_t *bot, int subpixpos, int width );
|
||||
|
||||
/**
|
||||
* Simple function to convert a 4:4:4 scanline to a 4:4:4:4 scanline by
|
||||
* adding an alpha channel. Result is non-premultiplied.
|
||||
*/
|
||||
extern void (*packed444_to_nonpremultiplied_packed4444_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
int width, int alpha );
|
||||
|
||||
/**
|
||||
* I think this function needs to be rethought and renamed, but here
|
||||
* it is for now. This function horizontally resamples a scanline
|
||||
* using linear interpolation to compensate for a change in pixel
|
||||
* aspect ratio.
|
||||
*/
|
||||
extern void (*aspect_adjust_packed4444_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
int width,
|
||||
double pixel_aspect );
|
||||
|
||||
/**
|
||||
* Convert a packed 4:4:4 surface to a packed 4:2:2 surface using
|
||||
* nearest neighbour chroma downsampling.
|
||||
*/
|
||||
extern void (*packed444_to_packed422_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
int width );
|
||||
|
||||
/**
|
||||
* Converts packed 4:2:2 to packed 4:4:4 scanlines using nearest
|
||||
* neighbour chroma upsampling.
|
||||
*/
|
||||
extern void (*packed422_to_packed444_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
int width );
|
||||
|
||||
/**
|
||||
* This filter actually does not meet the spec so calling it rec601
|
||||
* is a bit of a lie. I got the filter from Poynton's site. This
|
||||
* converts a scanline from packed 4:2:2 to packed 4:4:4. But this
|
||||
* function should point at some high quality to-the-spec resampler.
|
||||
*/
|
||||
extern void (*packed422_to_packed444_rec601_scanline)( uint8_t *dest,
|
||||
uint8_t *src,
|
||||
int width );
|
||||
|
||||
/**
|
||||
* Conversions between Y'CbCr and R'G'B'. We use Rec.601 numbers
|
||||
* since our source is broadcast video, but I think there is an
|
||||
* argument to be made for switching to Rec.709.
|
||||
*/
|
||||
extern void (*packed444_to_rgb24_rec601_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
int width );
|
||||
extern void (*rgb24_to_packed444_rec601_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
int width );
|
||||
extern void (*rgba32_to_packed4444_rec601_scanline)( uint8_t *output,
|
||||
uint8_t *input,
|
||||
int width );
|
||||
|
||||
/**
|
||||
* Convert from 4:2:2 with UYVY ordering to 4:2:2 with YUYV ordering.
|
||||
*/
|
||||
extern void (*convert_uyvy_to_yuyv_scanline)( uint8_t *uyvy_buf,
|
||||
uint8_t *yuyv_buf, int width );
|
||||
|
||||
/**
|
||||
* Sets up the function pointers to point at the fastest function
|
||||
* available. Requires accelleration settings (see mm_accel.h).
|
||||
*/
|
||||
void setup_speedy_calls( uint32_t accel, int verbose );
|
||||
|
||||
/**
|
||||
* Returns a bitfield of what accellerations were used when speedy was
|
||||
* initialized. See mm_accel.h.
|
||||
*/
|
||||
uint32_t speedy_get_accel( void );
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
#endif /* SPEEDY_H_INCLUDED */
|
992
gst/deinterlace2/tvtime/sse.h
Normal file
992
gst/deinterlace2/tvtime/sse.h
Normal file
|
@ -0,0 +1,992 @@
|
|||
/* sse.h
|
||||
|
||||
Streaming SIMD Extenstions (a.k.a. Katmai New Instructions)
|
||||
GCC interface library for IA32.
|
||||
|
||||
To use this library, simply include this header file
|
||||
and compile with GCC. You MUST have inlining enabled
|
||||
in order for sse_ok() to work; this can be done by
|
||||
simply using -O on the GCC command line.
|
||||
|
||||
Compiling with -DSSE_TRACE will cause detailed trace
|
||||
output to be sent to stderr for each sse operation.
|
||||
This adds lots of code, and obviously slows execution to
|
||||
a crawl, but can be very useful for debugging.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
|
||||
LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||
|
||||
1999 by R. Fisher
|
||||
Based on libmmx by H. Dietz and R. Fisher
|
||||
|
||||
Notes:
|
||||
This is still extremely alpha.
|
||||
Because this library depends on an assembler which understands the
|
||||
SSE opcodes, you probably won't be able to use this yet.
|
||||
For now, do not use TRACE versions. These both make use
|
||||
of the MMX registers, not the SSE registers. This will be resolved
|
||||
at a later date.
|
||||
ToDo:
|
||||
Rewrite TRACE macros
|
||||
Major Debugging Work
|
||||
*/
|
||||
|
||||
#ifndef _SSE_H
|
||||
#define _SSE_H
|
||||
|
||||
|
||||
|
||||
/* The type of an value that fits in an SSE register
|
||||
(note that long long constant values MUST be suffixed
|
||||
by LL and unsigned long long values by ULL, lest
|
||||
they be truncated by the compiler)
|
||||
*/
|
||||
typedef union {
|
||||
float sf[4]; /* Single-precision (32-bit) value */
|
||||
} __attribute__ ((aligned (16))) sse_t; /* On a 16 byte (128-bit) boundary */
|
||||
|
||||
|
||||
#if 0
|
||||
/* Function to test if multimedia instructions are supported...
|
||||
*/
|
||||
inline extern int
|
||||
mm_support(void)
|
||||
{
|
||||
/* Returns 1 if MMX instructions are supported,
|
||||
3 if Cyrix MMX and Extended MMX instructions are supported
|
||||
5 if AMD MMX and 3DNow! instructions are supported
|
||||
9 if MMX and SSE instructions are supported
|
||||
0 if hardware does not support any of these
|
||||
*/
|
||||
register int rval = 0;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
/* See if CPUID instruction is supported ... */
|
||||
/* ... Get copies of EFLAGS into eax and ecx */
|
||||
"pushf\n\t"
|
||||
"popl %%eax\n\t"
|
||||
"movl %%eax, %%ecx\n\t"
|
||||
|
||||
/* ... Toggle the ID bit in one copy and store */
|
||||
/* to the EFLAGS reg */
|
||||
"xorl $0x200000, %%eax\n\t"
|
||||
"push %%eax\n\t"
|
||||
"popf\n\t"
|
||||
|
||||
/* ... Get the (hopefully modified) EFLAGS */
|
||||
"pushf\n\t"
|
||||
"popl %%eax\n\t"
|
||||
|
||||
/* ... Compare and test result */
|
||||
"xorl %%eax, %%ecx\n\t"
|
||||
"testl $0x200000, %%ecx\n\t"
|
||||
"jz NotSupported1\n\t" /* CPUID not supported */
|
||||
|
||||
|
||||
/* Get standard CPUID information, and
|
||||
go to a specific vendor section */
|
||||
"movl $0, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
|
||||
/* Check for Intel */
|
||||
"cmpl $0x756e6547, %%ebx\n\t"
|
||||
"jne TryAMD\n\t"
|
||||
"cmpl $0x49656e69, %%edx\n\t"
|
||||
"jne TryAMD\n\t"
|
||||
"cmpl $0x6c65746e, %%ecx\n"
|
||||
"jne TryAMD\n\t"
|
||||
"jmp Intel\n\t"
|
||||
|
||||
/* Check for AMD */
|
||||
"\nTryAMD:\n\t"
|
||||
"cmpl $0x68747541, %%ebx\n\t"
|
||||
"jne TryCyrix\n\t"
|
||||
"cmpl $0x69746e65, %%edx\n\t"
|
||||
"jne TryCyrix\n\t"
|
||||
"cmpl $0x444d4163, %%ecx\n"
|
||||
"jne TryCyrix\n\t"
|
||||
"jmp AMD\n\t"
|
||||
|
||||
/* Check for Cyrix */
|
||||
"\nTryCyrix:\n\t"
|
||||
"cmpl $0x69727943, %%ebx\n\t"
|
||||
"jne NotSupported2\n\t"
|
||||
"cmpl $0x736e4978, %%edx\n\t"
|
||||
"jne NotSupported3\n\t"
|
||||
"cmpl $0x64616574, %%ecx\n\t"
|
||||
"jne NotSupported4\n\t"
|
||||
/* Drop through to Cyrix... */
|
||||
|
||||
|
||||
/* Cyrix Section */
|
||||
/* See if extended CPUID level 80000001 is supported */
|
||||
/* The value of CPUID/80000001 for the 6x86MX is undefined
|
||||
according to the Cyrix CPU Detection Guide (Preliminary
|
||||
Rev. 1.01 table 1), so we'll check the value of eax for
|
||||
CPUID/0 to see if standard CPUID level 2 is supported.
|
||||
According to the table, the only CPU which supports level
|
||||
2 is also the only one which supports extended CPUID levels.
|
||||
*/
|
||||
"cmpl $0x2, %%eax\n\t"
|
||||
"jne MMXtest\n\t" /* Use standard CPUID instead */
|
||||
|
||||
/* Extended CPUID supported (in theory), so get extended
|
||||
features */
|
||||
"movl $0x80000001, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"testl $0x00800000, %%eax\n\t" /* Test for MMX */
|
||||
"jz NotSupported5\n\t" /* MMX not supported */
|
||||
"testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
|
||||
"jnz EMMXSupported\n\t"
|
||||
"movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\n"
|
||||
"EMMXSupported:\n\t"
|
||||
"movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
|
||||
|
||||
/* AMD Section */
|
||||
"AMD:\n\t"
|
||||
|
||||
/* See if extended CPUID is supported */
|
||||
"movl $0x80000000, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"cmpl $0x80000000, %%eax\n\t"
|
||||
"jl MMXtest\n\t" /* Use standard CPUID instead */
|
||||
|
||||
/* Extended CPUID supported, so get extended features */
|
||||
"movl $0x80000001, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"testl $0x00800000, %%edx\n\t" /* Test for MMX */
|
||||
"jz NotSupported6\n\t" /* MMX not supported */
|
||||
"testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
|
||||
"jnz ThreeDNowSupported\n\t"
|
||||
"movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\n"
|
||||
"ThreeDNowSupported:\n\t"
|
||||
"movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
|
||||
|
||||
/* Intel Section */
|
||||
"Intel:\n\t"
|
||||
|
||||
/* Check for SSE */
|
||||
"SSEtest:\n\t"
|
||||
"movl $1, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"testl $0x02000000, %%edx\n\t" /* Test for SSE */
|
||||
"jz MMXtest\n\t" /* SSE Not supported */
|
||||
"movl $9, %0:\n\n\t" /* SSE Supported */
|
||||
"jmp Return\n\t"
|
||||
|
||||
/* Check for MMX */
|
||||
"MMXtest:\n\t"
|
||||
"movl $1, %%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"testl $0x00800000, %%edx\n\t" /* Test for MMX */
|
||||
"jz NotSupported7\n\t" /* MMX Not supported */
|
||||
"movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
|
||||
/* Nothing supported */
|
||||
"\nNotSupported1:\n\t"
|
||||
"#movl $101, %0:\n\n\t"
|
||||
"\nNotSupported2:\n\t"
|
||||
"#movl $102, %0:\n\n\t"
|
||||
"\nNotSupported3:\n\t"
|
||||
"#movl $103, %0:\n\n\t"
|
||||
"\nNotSupported4:\n\t"
|
||||
"#movl $104, %0:\n\n\t"
|
||||
"\nNotSupported5:\n\t"
|
||||
"#movl $105, %0:\n\n\t"
|
||||
"\nNotSupported6:\n\t"
|
||||
"#movl $106, %0:\n\n\t"
|
||||
"\nNotSupported7:\n\t"
|
||||
"#movl $107, %0:\n\n\t"
|
||||
"movl $0, %0:\n\n\t"
|
||||
|
||||
"Return:\n\t"
|
||||
: "=a" (rval)
|
||||
: /* no input */
|
||||
: "eax", "ebx", "ecx", "edx"
|
||||
);
|
||||
|
||||
/* Return */
|
||||
return(rval);
|
||||
}
|
||||
|
||||
/* Function to test if sse instructions are supported...
|
||||
*/
|
||||
inline extern int
|
||||
sse_ok(void)
|
||||
{
|
||||
/* Returns 1 if SSE instructions are supported, 0 otherwise */
|
||||
return ( (mm_support() & 0x8) >> 3 );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* Helper functions for the instruction macros that follow...
|
||||
(note that memory-to-register, m2r, instructions are nearly
|
||||
as efficient as register-to-register, r2r, instructions;
|
||||
however, memory-to-memory instructions are really simulated
|
||||
as a convenience, and are only 1/3 as efficient)
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
|
||||
/* Include the stuff for printing a trace to stderr...
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define sse_i2r(op, imm, reg) \
|
||||
{ \
|
||||
sse_t sse_trace; \
|
||||
sse_trace.uq = (imm); \
|
||||
fprintf(stderr, #op "_i2r(" #imm "=0x%08x%08x, ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (sse_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #reg "=0x%08x%08x) => ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (imm)); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (sse_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #reg "=0x%08x%08x\n", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
}
|
||||
|
||||
#define sse_m2r(op, mem, reg) \
|
||||
{ \
|
||||
sse_t sse_trace; \
|
||||
sse_trace = (mem); \
|
||||
fprintf(stderr, #op "_m2r(" #mem "=0x%08x%08x, ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (sse_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #reg "=0x%08x%08x) => ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (mem)); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (sse_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #reg "=0x%08x%08x\n", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
}
|
||||
|
||||
#define sse_r2m(op, reg, mem) \
|
||||
{ \
|
||||
sse_t sse_trace; \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (sse_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #op "_r2m(" #reg "=0x%08x%08x, ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
sse_trace = (mem); \
|
||||
fprintf(stderr, #mem "=0x%08x%08x) => ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||
: "=X" (mem) \
|
||||
: /* nothing */ ); \
|
||||
sse_trace = (mem); \
|
||||
fprintf(stderr, #mem "=0x%08x%08x\n", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
}
|
||||
|
||||
#define sse_r2r(op, regs, regd) \
|
||||
{ \
|
||||
sse_t sse_trace; \
|
||||
__asm__ __volatile__ ("movq %%" #regs ", %0" \
|
||||
: "=X" (sse_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #op "_r2r(" #regs "=0x%08x%08x, ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
__asm__ __volatile__ ("movq %%" #regd ", %0" \
|
||||
: "=X" (sse_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #regd "=0x%08x%08x) => ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
|
||||
__asm__ __volatile__ ("movq %%" #regd ", %0" \
|
||||
: "=X" (sse_trace) \
|
||||
: /* nothing */ ); \
|
||||
fprintf(stderr, #regd "=0x%08x%08x\n", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
}
|
||||
|
||||
#define sse_m2m(op, mems, memd) \
|
||||
{ \
|
||||
sse_t sse_trace; \
|
||||
sse_trace = (mems); \
|
||||
fprintf(stderr, #op "_m2m(" #mems "=0x%08x%08x, ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
sse_trace = (memd); \
|
||||
fprintf(stderr, #memd "=0x%08x%08x) => ", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
|
||||
#op " %1, %%mm0\n\t" \
|
||||
"movq %%mm0, %0" \
|
||||
: "=X" (memd) \
|
||||
: "X" (mems)); \
|
||||
sse_trace = (memd); \
|
||||
fprintf(stderr, #memd "=0x%08x%08x\n", \
|
||||
sse_trace.d[1], sse_trace.d[0]); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* These macros are a lot simpler without the tracing...
|
||||
*/
|
||||
|
||||
#define sse_i2r(op, imm, reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (imm) )
|
||||
|
||||
#define sse_m2r(op, mem, reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
|
||||
#define sse_r2m(op, reg, mem) \
|
||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||
: "=X" (mem) \
|
||||
: /* nothing */ )
|
||||
|
||||
#define sse_r2r(op, regs, regd) \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
|
||||
|
||||
#define sse_r2ri(op, regs, regd, imm) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
|
||||
: /* nothing */ \
|
||||
: "X" (imm) )
|
||||
|
||||
/* Load data from mems to xmmreg, operate on xmmreg, and store data to memd */
|
||||
#define sse_m2m(op, mems, memd, xmmreg) \
|
||||
__asm__ __volatile__ ("movups %0, %%xmm0\n\t" \
|
||||
#op " %1, %%xmm0\n\t" \
|
||||
"movups %%mm0, %0" \
|
||||
: "=X" (memd) \
|
||||
: "X" (mems))
|
||||
|
||||
#define sse_m2ri(op, mem, reg, subop) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
|
||||
#define sse_m2mi(op, mems, memd, xmmreg, subop) \
|
||||
__asm__ __volatile__ ("movups %0, %%xmm0\n\t" \
|
||||
#op " %1, %%xmm0, " #subop "\n\t" \
|
||||
"movups %%mm0, %0" \
|
||||
: "=X" (memd) \
|
||||
: "X" (mems))
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/* 1x128 MOVe Aligned four Packed Single-fp
|
||||
*/
|
||||
#define movaps_m2r(var, reg) sse_m2r(movaps, var, reg)
|
||||
#define movaps_r2m(reg, var) sse_r2m(movaps, reg, var)
|
||||
#define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd)
|
||||
#define movaps(vars, vard) \
|
||||
__asm__ __volatile__ ("movaps %1, %%mm0\n\t" \
|
||||
"movaps %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* 1x128 MOVe aligned Non-Temporal four Packed Single-fp
|
||||
*/
|
||||
#define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var)
|
||||
|
||||
|
||||
/* 1x64 MOVe Non-Temporal Quadword
|
||||
*/
|
||||
#define movntq_r2m(mmreg, var) sse_r2m(movntq, mmreg, var)
|
||||
|
||||
|
||||
/* 1x128 MOVe Unaligned four Packed Single-fp
|
||||
*/
|
||||
#define movups_m2r(var, reg) sse_m2r(movups, var, reg)
|
||||
#define movups_r2m(reg, var) sse_r2m(movups, reg, var)
|
||||
#define movups_r2r(regs, regd) sse_r2r(movups, regs, regd)
|
||||
#define movups(vars, vard) \
|
||||
__asm__ __volatile__ ("movups %1, %%mm0\n\t" \
|
||||
"movups %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* MOVe High to Low Packed Single-fp
|
||||
high half of 4x32f (x) -> low half of 4x32f (y)
|
||||
*/
|
||||
#define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd)
|
||||
|
||||
|
||||
/* MOVe Low to High Packed Single-fp
|
||||
low half of 4x32f (x) -> high half of 4x32f (y)
|
||||
*/
|
||||
#define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd)
|
||||
|
||||
|
||||
/* MOVe High Packed Single-fp
|
||||
2x32f -> high half of 4x32f
|
||||
*/
|
||||
#define movhps_m2r(var, reg) sse_m2r(movhps, var, reg)
|
||||
#define movhps_r2m(reg, var) sse_r2m(movhps, reg, var)
|
||||
#define movhps(vars, vard) \
|
||||
__asm__ __volatile__ ("movhps %1, %%mm0\n\t" \
|
||||
"movhps %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* MOVe Low Packed Single-fp
|
||||
2x32f -> low half of 4x32f
|
||||
*/
|
||||
#define movlps_m2r(var, reg) sse_m2r(movlps, var, reg)
|
||||
#define movlps_r2m(reg, var) sse_r2m(movlps, reg, var)
|
||||
#define movlps(vars, vard) \
|
||||
__asm__ __volatile__ ("movlps %1, %%mm0\n\t" \
|
||||
"movlps %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* MOVe Scalar Single-fp
|
||||
lowest field of 4x32f (x) -> lowest field of 4x32f (y)
|
||||
*/
|
||||
#define movss_m2r(var, reg) sse_m2r(movss, var, reg)
|
||||
#define movss_r2m(reg, var) sse_r2m(movss, reg, var)
|
||||
#define movss_r2r(regs, regd) sse_r2r(movss, regs, regd)
|
||||
#define movss(vars, vard) \
|
||||
__asm__ __volatile__ ("movss %1, %%mm0\n\t" \
|
||||
"movss %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* 4x16 Packed SHUFfle Word
|
||||
*/
|
||||
#define pshufw_m2r(var, reg, index) sse_m2ri(pshufw, var, reg, index)
|
||||
#define pshufw_r2r(regs, regd, index) sse_r2ri(pshufw, regs, regd, index)
|
||||
|
||||
|
||||
/* 1x128 SHUFfle Packed Single-fp
|
||||
*/
|
||||
#define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index)
|
||||
#define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index)
|
||||
|
||||
|
||||
/* ConVerT Packed signed Int32 to(2) Packed Single-fp
|
||||
*/
|
||||
#define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg)
|
||||
#define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg)
|
||||
|
||||
|
||||
/* ConVerT Packed Single-fp to(2) Packed signed Int32
|
||||
*/
|
||||
#define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg)
|
||||
#define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg)
|
||||
|
||||
|
||||
/* ConVerT with Truncate Packed Single-fp to(2) Packed Int32
|
||||
*/
|
||||
#define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg)
|
||||
#define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg)
|
||||
|
||||
|
||||
/* ConVerT Signed Int32 to(2) Single-fp (Scalar)
|
||||
*/
|
||||
#define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg)
|
||||
#define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg)
|
||||
|
||||
|
||||
/* ConVerT Scalar Single-fp to(2) Signed Int32
|
||||
*/
|
||||
#define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg)
|
||||
#define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg)
|
||||
|
||||
|
||||
/* ConVerT with Truncate Scalar Single-fp to(2) Signed Int32
|
||||
*/
|
||||
#define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg)
|
||||
#define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg)
|
||||
|
||||
|
||||
/* Parallel EXTRact Word from 4x16
|
||||
*/
|
||||
#define pextrw_r2r(mmreg, reg, field) sse_r2ri(pextrw, mmreg, reg, field)
|
||||
|
||||
|
||||
/* Parallel INSeRt Word from 4x16
|
||||
*/
|
||||
#define pinsrw_r2r(reg, mmreg, field) sse_r2ri(pinsrw, reg, mmreg, field)
|
||||
|
||||
|
||||
|
||||
/* MOVe MaSK from Packed Single-fp
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
#define movmskps(xmmreg, reg) \
|
||||
{ \
|
||||
fprintf(stderr, "movmskps()\n"); \
|
||||
__asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) \
|
||||
}
|
||||
#else
|
||||
#define movmskps(xmmreg, reg) \
|
||||
__asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg)
|
||||
#endif
|
||||
|
||||
|
||||
/* Parallel MOVe MaSK from mmx reg to 32-bit reg
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
#define pmovmskb(mmreg, reg) \
|
||||
{ \
|
||||
fprintf(stderr, "movmskps()\n"); \
|
||||
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) \
|
||||
}
|
||||
#else
|
||||
#define pmovmskb(mmreg, reg) \
|
||||
__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
|
||||
#endif
|
||||
|
||||
|
||||
/* MASKed MOVe from 8x8 to memory pointed to by (e)di register
|
||||
*/
|
||||
#define maskmovq(mmregs, fieldreg) sse_r2ri(maskmovq, mmregs, fieldreg)
|
||||
|
||||
|
||||
|
||||
|
||||
/* 4x32f Parallel ADDs
|
||||
*/
|
||||
#define addps_m2r(var, reg) sse_m2r(addps, var, reg)
|
||||
#define addps_r2r(regs, regd) sse_r2r(addps, regs, regd)
|
||||
#define addps(vars, vard, xmmreg) sse_m2m(addps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel ADDs
|
||||
*/
|
||||
#define addss_m2r(var, reg) sse_m2r(addss, var, reg)
|
||||
#define addss_r2r(regs, regd) sse_r2r(addss, regs, regd)
|
||||
#define addss(vars, vard, xmmreg) sse_m2m(addss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 4x32f Parallel SUBs
|
||||
*/
|
||||
#define subps_m2r(var, reg) sse_m2r(subps, var, reg)
|
||||
#define subps_r2r(regs, regd) sse_r2r(subps, regs, regd)
|
||||
#define subps(vars, vard, xmmreg) sse_m2m(subps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel SUBs
|
||||
*/
|
||||
#define subss_m2r(var, reg) sse_m2r(subss, var, reg)
|
||||
#define subss_r2r(regs, regd) sse_r2r(subss, regs, regd)
|
||||
#define subss(vars, vard, xmmreg) sse_m2m(subss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 8x8u -> 4x16u Packed Sum of Absolute Differences
|
||||
*/
|
||||
#define psadbw_m2r(var, reg) sse_m2r(psadbw, var, reg)
|
||||
#define psadbw_r2r(regs, regd) sse_r2r(psadbw, regs, regd)
|
||||
#define psadbw(vars, vard, mmreg) sse_m2m(psadbw, vars, vard, mmreg)
|
||||
|
||||
|
||||
/* 4x16u Parallel MUL High Unsigned
|
||||
*/
|
||||
#define pmulhuw_m2r(var, reg) sse_m2r(pmulhuw, var, reg)
|
||||
#define pmulhuw_r2r(regs, regd) sse_r2r(pmulhuw, regs, regd)
|
||||
#define pmulhuw(vars, vard, mmreg) sse_m2m(pmulhuw, vars, vard, mmreg)
|
||||
|
||||
|
||||
/* 4x32f Parallel MULs
|
||||
*/
|
||||
#define mulps_m2r(var, reg) sse_m2r(mulps, var, reg)
|
||||
#define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd)
|
||||
#define mulps(vars, vard, xmmreg) sse_m2m(mulps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel MULs
|
||||
*/
|
||||
#define mulss_m2r(var, reg) sse_m2r(mulss, var, reg)
|
||||
#define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd)
|
||||
#define mulss(vars, vard, xmmreg) sse_m2m(mulss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 4x32f Parallel DIVs
|
||||
*/
|
||||
#define divps_m2r(var, reg) sse_m2r(divps, var, reg)
|
||||
#define divps_r2r(regs, regd) sse_r2r(divps, regs, regd)
|
||||
#define divps(vars, vard, xmmreg) sse_m2m(divps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel DIVs
|
||||
*/
|
||||
#define divss_m2r(var, reg) sse_m2r(divss, var, reg)
|
||||
#define divss_r2r(regs, regd) sse_r2r(divss, regs, regd)
|
||||
#define divss(vars, vard, xmmreg) sse_m2m(divss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 4x32f Parallel Reciprocals
|
||||
*/
|
||||
#define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg)
|
||||
#define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd)
|
||||
#define rcpps(vars, vard, xmmreg) sse_m2m(rcpps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel Reciprocals
|
||||
*/
|
||||
#define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg)
|
||||
#define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd)
|
||||
#define rcpss(vars, vard, xmmreg) sse_m2m(rcpss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 4x32f Parallel Square Root of Reciprocals
|
||||
*/
|
||||
#define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg)
|
||||
#define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd)
|
||||
#define rsqrtps(vars, vard, xmmreg) sse_m2m(rsqrtps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel Square Root of Reciprocals
|
||||
*/
|
||||
#define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg)
|
||||
#define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd)
|
||||
#define rsqrtss(vars, vard, xmmreg) sse_m2m(rsqrtss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 4x32f Parallel Square Roots
|
||||
*/
|
||||
#define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg)
|
||||
#define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd)
|
||||
#define sqrtps(vars, vard, xmmreg) sse_m2m(sqrtps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel Square Roots
|
||||
*/
|
||||
#define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg)
|
||||
#define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd)
|
||||
#define sqrtss(vars, vard, xmmreg) sse_m2m(sqrtss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 8x8u and 4x16u Parallel AVeraGe
|
||||
*/
|
||||
#define pavgb_m2r(var, reg) sse_m2r(pavgb, var, reg)
|
||||
#define pavgb_r2r(regs, regd) sse_r2r(pavgb, regs, regd)
|
||||
#define pavgb(vars, vard, mmreg) sse_m2m(pavgb, vars, vard, mmreg)
|
||||
|
||||
#define pavgw_m2r(var, reg) sse_m2r(pavgw, var, reg)
|
||||
#define pavgw_r2r(regs, regd) sse_r2r(pavgw, regs, regd)
|
||||
#define pavgw(vars, vard, mmreg) sse_m2m(pavgw, vars, vard, mmreg)
|
||||
|
||||
|
||||
/* 1x128 bitwise AND
|
||||
*/
|
||||
#define andps_m2r(var, reg) sse_m2r(andps, var, reg)
|
||||
#define andps_r2r(regs, regd) sse_r2r(andps, regs, regd)
|
||||
#define andps(vars, vard, xmmreg) sse_m2m(andps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 1x128 bitwise AND with Not the destination
|
||||
*/
|
||||
#define andnps_m2r(var, reg) sse_m2r(andnps, var, reg)
|
||||
#define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd)
|
||||
#define andnps(vars, vard, xmmreg) sse_m2m(andnps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 1x128 bitwise OR
|
||||
*/
|
||||
#define orps_m2r(var, reg) sse_m2r(orps, var, reg)
|
||||
#define orps_r2r(regs, regd) sse_r2r(orps, regs, regd)
|
||||
#define orps(vars, vard, xmmreg) sse_m2m(orps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 1x128 bitwise eXclusive OR
|
||||
*/
|
||||
#define xorps_m2r(var, reg) sse_m2r(xorps, var, reg)
|
||||
#define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd)
|
||||
#define xorps(vars, vard, xmmreg) sse_m2m(xorps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 8x8u, 4x16, and 4x32f Parallel Maximum
|
||||
*/
|
||||
#define pmaxub_m2r(var, reg) sse_m2r(pmaxub, var, reg)
|
||||
#define pmaxub_r2r(regs, regd) sse_r2r(pmaxub, regs, regd)
|
||||
#define pmaxub(vars, vard, mmreg) sse_m2m(pmaxub, vars, vard, mmreg)
|
||||
|
||||
#define pmaxsw_m2r(var, reg) sse_m2r(pmaxsw, var, reg)
|
||||
#define pmaxsw_r2r(regs, regd) sse_r2r(pmaxsw, regs, regd)
|
||||
#define pmaxsw(vars, vard, mmreg) sse_m2m(pmaxsw, vars, vard, mmreg)
|
||||
|
||||
#define maxps_m2r(var, reg) sse_m2r(maxps, var, reg)
|
||||
#define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd)
|
||||
#define maxps(vars, vard, xmmreg) sse_m2m(maxps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel Maximum
|
||||
*/
|
||||
#define maxss_m2r(var, reg) sse_m2r(maxss, var, reg)
|
||||
#define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd)
|
||||
#define maxss(vars, vard, xmmreg) sse_m2m(maxss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 8x8u, 4x16, and 4x32f Parallel Minimum
|
||||
*/
|
||||
#define pminub_m2r(var, reg) sse_m2r(pminub, var, reg)
|
||||
#define pminub_r2r(regs, regd) sse_r2r(pminub, regs, regd)
|
||||
#define pminub(vars, vard, mmreg) sse_m2m(pminub, vars, vard, mmreg)
|
||||
|
||||
#define pminsw_m2r(var, reg) sse_m2r(pminsw, var, reg)
|
||||
#define pminsw_r2r(regs, regd) sse_r2r(pminsw, regs, regd)
|
||||
#define pminsw(vars, vard, mmreg) sse_m2m(pminsw, vars, vard, mmreg)
|
||||
|
||||
#define minps_m2r(var, reg) sse_m2r(minps, var, reg)
|
||||
#define minps_r2r(regs, regd) sse_r2r(minps, regs, regd)
|
||||
#define minps(vars, vard, xmmreg) sse_m2m(minps, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel Minimum
|
||||
*/
|
||||
#define minss_m2r(var, reg) sse_m2r(minss, var, reg)
|
||||
#define minss_r2r(regs, regd) sse_r2r(minss, regs, regd)
|
||||
#define minss(vars, vard, xmmreg) sse_m2m(minss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 4x32f Parallel CoMPares
|
||||
(resulting fields are either 0 or -1)
|
||||
*/
|
||||
#define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op)
|
||||
#define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op)
|
||||
#define cmpps(vars, vard, op, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, op)
|
||||
|
||||
#define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0)
|
||||
#define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0)
|
||||
#define cmpeqps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 0)
|
||||
|
||||
#define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1)
|
||||
#define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1)
|
||||
#define cmpltps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 1)
|
||||
|
||||
#define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2)
|
||||
#define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2)
|
||||
#define cmpleps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 2)
|
||||
|
||||
#define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3)
|
||||
#define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3)
|
||||
#define cmpunordps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 3)
|
||||
|
||||
#define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4)
|
||||
#define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4)
|
||||
#define cmpneqps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 4)
|
||||
|
||||
#define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5)
|
||||
#define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5)
|
||||
#define cmpnltps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 5)
|
||||
|
||||
#define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6)
|
||||
#define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6)
|
||||
#define cmpnleps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 6)
|
||||
|
||||
#define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7)
|
||||
#define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7)
|
||||
#define cmpordps(vars, vard, xmmreg) sse_m2mi(cmpps, vars, vard, xmmreg, 7)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel CoMPares
|
||||
(resulting fields are either 0 or -1)
|
||||
*/
|
||||
#define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op)
|
||||
#define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op)
|
||||
#define cmpss(vars, vard, op, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, op)
|
||||
|
||||
#define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0)
|
||||
#define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0)
|
||||
#define cmpeqss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 0)
|
||||
|
||||
#define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1)
|
||||
#define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1)
|
||||
#define cmpltss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 1)
|
||||
|
||||
#define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2)
|
||||
#define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2)
|
||||
#define cmpless(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 2)
|
||||
|
||||
#define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3)
|
||||
#define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3)
|
||||
#define cmpunordss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 3)
|
||||
|
||||
#define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4)
|
||||
#define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4)
|
||||
#define cmpneqss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 4)
|
||||
|
||||
#define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5)
|
||||
#define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5)
|
||||
#define cmpnltss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 5)
|
||||
|
||||
#define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6)
|
||||
#define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6)
|
||||
#define cmpnless(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 6)
|
||||
|
||||
#define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7)
|
||||
#define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7)
|
||||
#define cmpordss(vars, vard, xmmreg) sse_m2mi(cmpss, vars, vard, xmmreg, 7)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Parallel CoMPares to set EFLAGS
|
||||
(resulting fields are either 0 or -1)
|
||||
*/
|
||||
#define comiss_m2r(var, reg) sse_m2r(comiss, var, reg)
|
||||
#define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd)
|
||||
#define comiss(vars, vard, xmmreg) sse_m2m(comiss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* Lowest Field of 4x32f Unordered Parallel CoMPares to set EFLAGS
|
||||
(resulting fields are either 0 or -1)
|
||||
*/
|
||||
#define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg)
|
||||
#define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd)
|
||||
#define ucomiss(vars, vard, xmmreg) sse_m2m(ucomiss, vars, vard, xmmreg)
|
||||
|
||||
|
||||
/* 2-(4x32f) -> 4x32f UNPaCK Low Packed Single-fp
|
||||
(interleaves low half of dest with low half of source
|
||||
as padding in each result field)
|
||||
*/
|
||||
#define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg)
|
||||
#define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd)
|
||||
|
||||
|
||||
/* 2-(4x32f) -> 4x32f UNPaCK High Packed Single-fp
|
||||
(interleaves high half of dest with high half of source
|
||||
as padding in each result field)
|
||||
*/
|
||||
#define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg)
|
||||
#define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd)
|
||||
|
||||
|
||||
|
||||
/* Fp and mmX ReSTORe state
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
#define fxrstor(mem) \
|
||||
{ \
|
||||
fprintf(stderr, "fxrstor()\n"); \
|
||||
__asm__ __volatile__ ("fxrstor %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem)) \
|
||||
}
|
||||
#else
|
||||
#define fxrstor(mem) \
|
||||
__asm__ __volatile__ ("fxrstor %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
#endif
|
||||
|
||||
|
||||
/* Fp and mmX SAVE state
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
#define fxsave(mem) \
|
||||
{ \
|
||||
fprintf(stderr, "fxsave()\n"); \
|
||||
__asm__ __volatile__ ("fxsave %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem)) \
|
||||
}
|
||||
#else
|
||||
#define fxsave(mem) \
|
||||
__asm__ __volatile__ ("fxsave %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
#endif
|
||||
|
||||
|
||||
/* STore streaMing simd eXtensions Control/Status Register
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
#define stmxcsr(mem) \
|
||||
{ \
|
||||
fprintf(stderr, "stmxcsr()\n"); \
|
||||
__asm__ __volatile__ ("stmxcsr %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem)) \
|
||||
}
|
||||
#else
|
||||
#define stmxcsr(mem) \
|
||||
__asm__ __volatile__ ("stmxcsr %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
#endif
|
||||
|
||||
|
||||
/* LoaD streaMing simd eXtensions Control/Status Register
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
#define ldmxcsr(mem) \
|
||||
{ \
|
||||
fprintf(stderr, "ldmxcsr()\n"); \
|
||||
__asm__ __volatile__ ("ldmxcsr %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem)) \
|
||||
}
|
||||
#else
|
||||
#define ldmxcsr(mem) \
|
||||
__asm__ __volatile__ ("ldmxcsr %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
#endif
|
||||
|
||||
|
||||
/* Store FENCE - enforce ordering of stores before fence vs. stores
|
||||
occuring after fence in source code.
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
#define sfence() \
|
||||
{ \
|
||||
fprintf(stderr, "sfence()\n"); \
|
||||
__asm__ __volatile__ ("sfence\n\t") \
|
||||
}
|
||||
#else
|
||||
#define sfence() \
|
||||
__asm__ __volatile__ ("sfence\n\t")
|
||||
#endif
|
||||
|
||||
|
||||
/* PREFETCH data using T0, T1, T2, or NTA hint
|
||||
T0 = Prefetch into all cache levels
|
||||
T1 = Prefetch into all cache levels except 0th level
|
||||
T2 = Prefetch into all cache levels except 0th and 1st levels
|
||||
NTA = Prefetch data into non-temporal cache structure
|
||||
*/
|
||||
#ifdef SSE_TRACE
|
||||
#else
|
||||
#define prefetch(mem, hint) \
|
||||
__asm__ __volatile__ ("prefetch" #hint " %0" \
|
||||
: /* nothing */ \
|
||||
: "X" (mem))
|
||||
|
||||
#define prefetcht0(mem) prefetch(mem, t0)
|
||||
#define prefetcht1(mem) prefetch(mem, t1)
|
||||
#define prefetcht2(mem) prefetch(mem, t2)
|
||||
#define prefetchnta(mem) prefetch(mem, nta)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif
|
187
gst/deinterlace2/tvtime/tomsmocomp.c
Normal file
187
gst/deinterlace2/tvtime/tomsmocomp.c
Normal file
|
@ -0,0 +1,187 @@
|
|||
/**
|
||||
* Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gst/gst.h"
|
||||
#include "gstdeinterlace2.h"
|
||||
#include "plugins.h"
|
||||
#include "speedy.h"
|
||||
|
||||
#include "tomsmocomp.h"
|
||||
#include "tomsmocomp/tomsmocompmacros.h"
|
||||
#include "x86-64_macros.inc"
|
||||
|
||||
|
||||
#define SearchEffortDefault 5
|
||||
#define UseStrangeBobDefault 0
|
||||
|
||||
long SearchEffort;
|
||||
|
||||
int UseStrangeBob;
|
||||
|
||||
MEMCPY_FUNC *pMyMemcpy;
|
||||
|
||||
int IsOdd;
|
||||
|
||||
const unsigned char *pWeaveSrc;
|
||||
|
||||
const unsigned char *pWeaveSrcP;
|
||||
|
||||
unsigned char *pWeaveDest;
|
||||
|
||||
const unsigned char *pCopySrc;
|
||||
|
||||
const unsigned char *pCopySrcP;
|
||||
|
||||
unsigned char *pCopyDest;
|
||||
|
||||
int src_pitch;
|
||||
|
||||
int dst_pitch;
|
||||
|
||||
int rowsize;
|
||||
|
||||
int height;
|
||||
|
||||
int FldHeight;
|
||||
|
||||
int
|
||||
Fieldcopy (void *dest, const void *src, size_t count,
|
||||
int rows, int dst_pitch, int src_pitch)
|
||||
{
|
||||
unsigned char *pDest = (unsigned char *) dest;
|
||||
|
||||
unsigned char *pSrc = (unsigned char *) src;
|
||||
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rows; i++) {
|
||||
pMyMemcpy (pDest, pSrc, count);
|
||||
pSrc += src_pitch;
|
||||
pDest += dst_pitch;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#define IS_MMX
|
||||
#define SSE_TYPE MMX
|
||||
#define FUNCT_NAME tomsmocompDScaler_MMX
|
||||
#include "tomsmocomp/TomsMoCompAll.inc"
|
||||
#undef IS_MMX
|
||||
#undef SSE_TYPE
|
||||
#undef FUNCT_NAME
|
||||
|
||||
#define IS_3DNOW
|
||||
#define SSE_TYPE 3DNOW
|
||||
#define FUNCT_NAME tomsmocompDScaler_3DNOW
|
||||
#include "tomsmocomp/TomsMoCompAll.inc"
|
||||
#undef IS_3DNOW
|
||||
#undef SSE_TYPE
|
||||
#undef FUNCT_NAME
|
||||
|
||||
#define IS_SSE
|
||||
#define SSE_TYPE SSE
|
||||
#define FUNCT_NAME tomsmocompDScaler_SSE
|
||||
#include "tomsmocomp/TomsMoCompAll.inc"
|
||||
#undef IS_SSE
|
||||
#undef SSE_TYPE
|
||||
#undef FUNCT_NAME
|
||||
|
||||
|
||||
|
||||
void
|
||||
deinterlace_frame_di_tomsmocomp (GstDeinterlace2 * object)
|
||||
{
|
||||
if (object->cpu_feature_flags & OIL_IMPL_FLAG_SSE) {
|
||||
tomsmocomp_filter_sse (object);
|
||||
} else if (object->cpu_feature_flags & OIL_IMPL_FLAG_3DNOW) {
|
||||
tomsmocomp_filter_3dnow (object);
|
||||
} else {
|
||||
tomsmocomp_filter_mmx (object);
|
||||
}
|
||||
}
|
||||
|
||||
static deinterlace_method_t tomsmocompmethod = {
|
||||
0, //DEINTERLACE_PLUGIN_API_VERSION,
|
||||
"Motion Adaptive: Motion Search",
|
||||
"AdaptiveSearch",
|
||||
4,
|
||||
OIL_IMPL_FLAG_MMX,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
deinterlace_frame_di_tomsmocomp,
|
||||
{"Uses heuristics to detect motion in the input",
|
||||
"frames and reconstruct image detail where",
|
||||
"possible. Use this for high quality output",
|
||||
"even on monitors set to an arbitrary refresh",
|
||||
"rate.",
|
||||
"",
|
||||
"Motion search mode finds and follows motion",
|
||||
"vectors for accurate interpolation. This is",
|
||||
"the TomsMoComp deinterlacer from DScaler.",
|
||||
""}
|
||||
};
|
||||
|
||||
|
||||
|
||||
deinterlace_method_t *
|
||||
dscaler_tomsmocomp_get_method (void)
|
||||
{
|
||||
tomsmocomp_init ();
|
||||
return &tomsmocompmethod;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
tomsmocomp_init (void)
|
||||
{
|
||||
SearchEffort = SearchEffortDefault;
|
||||
UseStrangeBob = UseStrangeBobDefault;
|
||||
}
|
||||
|
||||
void
|
||||
tomsmocomp_filter_mmx (GstDeinterlace2 * object)
|
||||
{
|
||||
tomsmocompDScaler_MMX (object);
|
||||
}
|
||||
|
||||
void
|
||||
tomsmocomp_filter_3dnow (GstDeinterlace2 * object)
|
||||
{
|
||||
tomsmocompDScaler_3DNOW (object);
|
||||
}
|
||||
|
||||
void
|
||||
tomsmocomp_filter_sse (GstDeinterlace2 * object)
|
||||
{
|
||||
tomsmocompDScaler_SSE (object);
|
||||
}
|
61
gst/deinterlace2/tvtime/tomsmocomp.h
Normal file
61
gst/deinterlace2/tvtime/tomsmocomp.h
Normal file
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software Foundation,
|
||||
* Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#ifndef TOMSMOCOMP_H_INCLUDED
|
||||
#define TOMSMOCOMP_H_INCLUDED
|
||||
|
||||
#include "gstdeinterlace2.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int Search_Effort_0();
|
||||
int Search_Effort_1();
|
||||
int Search_Effort_3();
|
||||
int Search_Effort_5();
|
||||
int Search_Effort_9();
|
||||
int Search_Effort_11();
|
||||
int Search_Effort_13();
|
||||
int Search_Effort_15();
|
||||
int Search_Effort_19();
|
||||
int Search_Effort_21();
|
||||
int Search_Effort_Max();
|
||||
|
||||
int Search_Effort_0_SB();
|
||||
int Search_Effort_1_SB();
|
||||
int Search_Effort_3_SB();
|
||||
int Search_Effort_5_SB();
|
||||
int Search_Effort_9_SB();
|
||||
int Search_Effort_11_SB();
|
||||
int Search_Effort_13_SB();
|
||||
int Search_Effort_15_SB();
|
||||
int Search_Effort_19_SB();
|
||||
int Search_Effort_21_SB();
|
||||
int Search_Effort_Max_SB();
|
||||
|
||||
void tomsmocomp_init( void );
|
||||
void tomsmocomp_filter_mmx( GstDeinterlace2 *object );
|
||||
void tomsmocomp_filter_3dnow( GstDeinterlace2 *object );
|
||||
void tomsmocomp_filter_sse( GstDeinterlace2 *object );
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* TOMSMOCOMP_H_INCLUDED */
|
184
gst/deinterlace2/tvtime/vfir.c
Normal file
184
gst/deinterlace2/tvtime/vfir.c
Normal file
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (C) 2004 Billy Biggs <vektor@dumbterm.net>
|
||||
* Copyright (c) 2001, 2002, 2003 Fabrice Bellard.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains code from ffmpeg, see http://ffmpeg.org/ (LGPL)
|
||||
* and modifications by Billy Biggs.
|
||||
*
|
||||
* Relicensed for GStreamer from GPL to LGPL with permit from Billy Biggs.
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#if defined (__SVR4) && defined (__sun)
|
||||
# include <sys/int_types.h>
|
||||
#else
|
||||
# include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include "mmx.h"
|
||||
#include "speedy.h"
|
||||
#include "gstdeinterlace2.h"
|
||||
|
||||
/**
|
||||
* The MPEG2 spec uses a slightly harsher filter, they specify
|
||||
* [-1 8 2 8 -1]. ffmpeg uses a similar filter but with more of
|
||||
* a tendancy to blur than to use the local information. The
|
||||
* filter taps here are: [-1 4 2 4 -1].
|
||||
*/
|
||||
|
||||
static void
|
||||
deinterlace_line (uint8_t * dst, uint8_t * lum_m4,
|
||||
uint8_t * lum_m3, uint8_t * lum_m2,
|
||||
uint8_t * lum_m1, uint8_t * lum, int size)
|
||||
{
|
||||
#ifdef HAVE_CPU_I386
|
||||
mmx_t rounder;
|
||||
|
||||
rounder.uw[0] = 4;
|
||||
rounder.uw[1] = 4;
|
||||
rounder.uw[2] = 4;
|
||||
rounder.uw[3] = 4;
|
||||
pxor_r2r (mm7, mm7);
|
||||
movq_m2r (rounder, mm6);
|
||||
|
||||
for (; size > 3; size -= 4) {
|
||||
movd_m2r (lum_m4[0], mm0);
|
||||
movd_m2r (lum_m3[0], mm1);
|
||||
movd_m2r (lum_m2[0], mm2);
|
||||
movd_m2r (lum_m1[0], mm3);
|
||||
movd_m2r (lum[0], mm4);
|
||||
punpcklbw_r2r (mm7, mm0);
|
||||
punpcklbw_r2r (mm7, mm1);
|
||||
punpcklbw_r2r (mm7, mm2);
|
||||
punpcklbw_r2r (mm7, mm3);
|
||||
punpcklbw_r2r (mm7, mm4);
|
||||
paddw_r2r (mm3, mm1);
|
||||
psllw_i2r (1, mm2);
|
||||
paddw_r2r (mm4, mm0);
|
||||
psllw_i2r (2, mm1); // 2
|
||||
paddw_r2r (mm6, mm2);
|
||||
paddw_r2r (mm2, mm1);
|
||||
psubusw_r2r (mm0, mm1);
|
||||
psrlw_i2r (3, mm1); // 3
|
||||
packuswb_r2r (mm7, mm1);
|
||||
movd_r2m (mm1, dst[0]);
|
||||
lum_m4 += 4;
|
||||
lum_m3 += 4;
|
||||
lum_m2 += 4;
|
||||
lum_m1 += 4;
|
||||
lum += 4;
|
||||
dst += 4;
|
||||
}
|
||||
emms ();
|
||||
#else
|
||||
/**
|
||||
* C implementation.
|
||||
*/
|
||||
int sum;
|
||||
|
||||
for (; size > 0; size--) {
|
||||
sum = -lum_m4[0];
|
||||
sum += lum_m3[0] << 2;
|
||||
sum += lum_m2[0] << 1;
|
||||
sum += lum_m1[0] << 2;
|
||||
sum += -lum[0];
|
||||
dst[0] = (sum + 4) >> 3; // This needs to be clipped at 0 and 255: cm[(sum + 4) >> 3];
|
||||
lum_m4++;
|
||||
lum_m3++;
|
||||
lum_m2++;
|
||||
lum_m1++;
|
||||
lum++;
|
||||
dst++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The commented-out method below that uses the bottom_field member is more
|
||||
* like the filter as specified in the MPEG2 spec, but it doesn't seem to
|
||||
* have the desired effect.
|
||||
*/
|
||||
|
||||
static void
|
||||
deinterlace_scanline_vfir (GstDeinterlace2 * object,
|
||||
deinterlace_scanline_data_t * data, uint8_t * output)
|
||||
{
|
||||
deinterlace_line (output, data->tt1, data->t0, data->m1, data->b0, data->bb1,
|
||||
object->frame_width * 2);
|
||||
// blit_packed422_scanline( output, data->m1, width );
|
||||
}
|
||||
|
||||
static void
|
||||
copy_scanline (GstDeinterlace2 * object,
|
||||
deinterlace_scanline_data_t * data, uint8_t * output)
|
||||
{
|
||||
blit_packed422_scanline (output, data->m0, object->frame_width);
|
||||
/*
|
||||
if( data->bottom_field ) {
|
||||
deinterlace_line( output, data->tt2, data->t1, data->m2, data->b1, data->bb2, width*2 );
|
||||
} else {
|
||||
deinterlace_line( output, data->tt0, data->t1, data->m0, data->b1, data->bb0, width*2 );
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
static deinterlace_method_t vfirmethod = {
|
||||
0, //DEINTERLACE_PLUGIN_API_VERSION,
|
||||
"Blur: Vertical",
|
||||
"BlurVertical",
|
||||
2,
|
||||
#ifdef HAVE_CPU_I386
|
||||
OIL_IMPL_FLAG_MMXEXT,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
deinterlace_scanline_vfir,
|
||||
copy_scanline,
|
||||
0,
|
||||
{"Avoids flicker by blurring consecutive frames",
|
||||
"of input. Use this if you want to run your",
|
||||
"monitor at an arbitrary refresh rate and not",
|
||||
"use much CPU, and are willing to sacrifice",
|
||||
"detail.",
|
||||
"",
|
||||
"Vertical mode blurs favouring the most recent",
|
||||
"field for less visible trails. From the",
|
||||
"deinterlacer filter in ffmpeg.",
|
||||
""}
|
||||
};
|
||||
|
||||
deinterlace_method_t *
|
||||
dscaler_vfir_get_method (void)
|
||||
{
|
||||
return &vfirmethod;
|
||||
}
|
82
gst/deinterlace2/tvtime/x86-64_macros.inc
Normal file
82
gst/deinterlace2/tvtime/x86-64_macros.inc
Normal file
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
*
|
||||
* GStreamer
|
||||
* Copyright (C) 2004 Dirk Ziegelmeier <dziegel@gmx.de>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public
|
||||
* License along with this library; if not, write to the
|
||||
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
||||
* Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* See: http://bugzilla.gnome.org/show_bug.cgi?id=163578
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is copied from TVTIME's sources.
|
||||
* Original author: Achim Schneider <batchall@mordor.ch>
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifndef XAX
|
||||
|
||||
#if defined (HAVE_CPU_I386) && !defined(HAVE_CPU_X86_64)
|
||||
|
||||
#define XAX "eax"
|
||||
#define XBX "ebx"
|
||||
#define XCX "ecx"
|
||||
#define XDX "edx"
|
||||
#define XSI "esi"
|
||||
#define XDI "edi"
|
||||
#define XSP "esp"
|
||||
#define MOVX "movl"
|
||||
#define LEAX "leal"
|
||||
#define DECX "decl"
|
||||
#define PUSHX "pushl"
|
||||
#define POPX "popl"
|
||||
#define CMPX "cmpl"
|
||||
#define ADDX "addl"
|
||||
#define SHLX "shll"
|
||||
#define SHRX "shrl"
|
||||
#define SUBX "subl"
|
||||
|
||||
#elif defined (HAVE_CPU_X86_64)
|
||||
|
||||
#define XAX "rax"
|
||||
#define XBX "rbx"
|
||||
#define XCX "rcx"
|
||||
#define XDX "rdx"
|
||||
#define XSI "rsi"
|
||||
#define XDI "rdi"
|
||||
#define XSP "rsp"
|
||||
#define MOVX "movq"
|
||||
#define LEAX "leaq"
|
||||
#define DECX "decq"
|
||||
#define PUSHX "pushq"
|
||||
#define POPX "popq"
|
||||
#define CMPX "cmpq"
|
||||
#define ADDX "addq"
|
||||
#define SHLX "shlq"
|
||||
#define SHRX "shrq"
|
||||
#define SUBX "subq"
|
||||
|
||||
#else
|
||||
#error Undefined architecture. Define either ARCH_X86 or ARCH_X86_64.
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
Reference in a new issue