gst/deinterlace2/: First part of the C implementation of the tomsmocomp deinterlacing algorithm. This only supports s...

Original commit message from CVS:
* gst/deinterlace2/gstdeinterlace2.c:
(gst_deinterlace_method_class_init):
* gst/deinterlace2/gstdeinterlace2.h:
* gst/deinterlace2/tvtime/tomsmocomp.c:
(gst_deinterlace_method_tomsmocomp_class_init):
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
* gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h:
First part of the C implementation of the tomsmocomp deinterlacing
algorithm. This only supports search-effort=0 currently, is painfully
slow and needs some cleanup later when all search-effort settings
are implemented in C.
This commit is contained in:
Sebastian Dröge 2008-08-25 14:37:45 +00:00
parent c980279fa4
commit 3e4982542b
11 changed files with 374 additions and 27 deletions

View file

@ -1,3 +1,22 @@
2008-08-25 Sebastian Dröge <sebastian.droege@collabora.co.uk>
* gst/deinterlace2/gstdeinterlace2.c:
(gst_deinterlace_method_class_init):
* gst/deinterlace2/gstdeinterlace2.h:
* gst/deinterlace2/tvtime/tomsmocomp.c:
(gst_deinterlace_method_tomsmocomp_class_init):
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc:
* gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc:
* gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc:
* gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc:
* gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc:
* gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h:
First part of the C implementation of the tomsmocomp deinterlacing
algorithm. This only supports search-effort=0 currently, is painfully
slow and needs some cleanup later when all search-effort settings
are implemented in C.
2008-08-24 Ole André Vadla Ravnås <ole.andre.ravnas@tandberg.com>
* configure.ac:

View file

@ -55,7 +55,7 @@ G_DEFINE_TYPE (GstDeinterlaceMethod, gst_deinterlace_method, GST_TYPE_OBJECT);
static void
gst_deinterlace_method_class_init (GstDeinterlaceMethodClass * klass)
{
klass->available = TRUE;
}
static void

View file

@ -74,8 +74,6 @@ struct _GstDeinterlaceMethodClass {
guint fields_required;
guint latency;
gboolean available;
void (*deinterlace_frame) (GstDeinterlaceMethod *self, GstDeinterlace2 * parent);
const gchar *name;

View file

@ -29,9 +29,6 @@
#include "gstdeinterlace2.h"
#include "plugins.h"
#include "tomsmocomp/tomsmocompmacros.h"
#include "x86-64_macros.inc"
#define GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP (gst_deinterlace_method_tomsmocomp_get_type ())
#define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP))
#define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP))
@ -72,6 +69,20 @@ Fieldcopy (void *dest, const void *src, size_t count,
return 0;
}
#define USE_FOR_DSCALER
#define IS_C
#define SIMD_TYPE C
#define FUNCT_NAME tomsmocompDScaler_C
#include "tomsmocomp/TomsMoCompAll.inc"
#undef IS_C
#undef SIMD_TYPE
#undef FUNCT_NAME
#ifdef BUILD_X86_ASM
#include "tomsmocomp/tomsmocompmacros.h"
#include "x86-64_macros.inc"
#define IS_MMX
#define SIMD_TYPE MMX
@ -97,6 +108,8 @@ Fieldcopy (void *dest, const void *src, size_t count,
#undef SIMD_TYPE
#undef FUNCT_NAME
#endif
G_DEFINE_TYPE (GstDeinterlaceMethodTomsMoComp,
gst_deinterlace_method_tomsmocomp, GST_TYPE_DEINTERLACE_METHOD);
@ -173,6 +186,7 @@ static void
dim_class->nick = "tomsmocomp";
dim_class->latency = 1;
#ifdef BUILD_X86_ASM
if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) {
dim_class->deinterlace_frame = tomsmocompDScaler_MMXEXT;
} else if (cpu_flags & OIL_IMPL_FLAG_3DNOW) {
@ -180,8 +194,11 @@ static void
} else if (cpu_flags & OIL_IMPL_FLAG_MMX) {
dim_class->deinterlace_frame = tomsmocompDScaler_MMX;
} else {
dim_class->available = FALSE;
dim_class->deinterlace_frame = tomsmocompDScaler_C;
}
#else
dim_class->deinterlace_frame = tomsmocompDScaler_C;
#endif
}
static void

View file

@ -1,11 +1,9 @@
// -*- c++ -*-
#ifdef IS_SSE2
//sse2 code deleted for now
#else
// Version for non-SSE2
#ifndef IS_C
#ifdef SKIP_SEARCH
"movq %%mm6, %%mm0\n\t" // just use the results of our wierd bob
#else
@ -59,6 +57,7 @@
V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0")
// pavgb mm1, qword ptr["XBX"+"XCX"]
V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask)
//FIXME: XDX or XAX!!
"addq "_dst_pitchw", %%"XBX
// movntq qword ptr["XAX"+"XDX"], mm1
V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1")
@ -71,7 +70,6 @@
LEAX" 8(%%"XDX"), %%"XDX"\n\t" // bump offset pointer
CMPX" "_Last8", %%"XDX"\n\t" // done with line?
"jb 1b\n\t" // y
#endif
MOVX" "_oldbx", %%"XBX"\n\t"
@ -114,3 +112,51 @@
}
return 0;
#else
#ifdef SKIP_SEARCH
out = best; // just use the results of our wierd bob
#else
diff = diff - MIN (diff, 10) - 4;
if (diff < 0)
out = weave;
else
out = best;
out = CLAMP (out, MinVals, MaxVals);
#endif
#ifdef USE_VERTICAL_FILTER
pDest[x] = (out + pBob[0]) / 2;
pDest[x + dst_pitchw] = (pBob[src_pitch2] + out) / 2;
#else
pDest[x] = out;
#endif
pBob += 1;
pBobP += 1;
pSrc += 1;
pSrcP += 1;
}
// adjust for next line
pSrc = src_pitch2 * (y+1) + pWeaveSrc;
pSrcP = src_pitch2 * (y+1) + pWeaveSrcP;
pDest = dst_pitch2 * (y+1) + pWeaveDest + dst_pitch2;
if (TopFirst)
{
pBob = pCopySrc + src_pitch2;
pBobP = pCopySrcP + src_pitch2;
}
else
{
pBob = pCopySrc;
pBobP = pCopySrcP;
}
pBob += src_pitch2 * (y+1);
pBobP += src_pitch2 * (y+1);
}
return 0;
#endif

View file

@ -6,6 +6,8 @@ const unsigned char* pSrc;
const unsigned char* pBob;
const unsigned char* pBobP;
#ifndef IS_C
int64_t Max_Mov = 0x0404040404040404ull;
int64_t DiffThres = 0x0f0f0f0f0f0f0f0full;
int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma
@ -19,6 +21,14 @@ int64_t ShiftMask = 0xfefffefffefffeffull;
long oldbx;
#else
#ifdef USE_STRANGE_BOB
int64_t DiffThres = 0x0f;
#endif
#endif
// long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way
// saves a lot of xor's to delete 64bit garbage.
@ -30,15 +40,23 @@ long src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avi
long dst_pitch2 = 2 * dst_pitch;
#ifdef IS_C
long x,best,diff,avg,diff2,out;
#endif
long y;
#ifdef IS_SSE2
#if defined(IS_SSE2)
long Last8 = (rowsize-16); // ofs to last 16 bytes in row for SSE2
#elif defined(IS_C)
long Last8 = (rowsize-4); // ofs to last two pixel in row
#else
long Last8 = (rowsize-8); // ofs to last 8 bytes in row
#endif
long dst_pitchw = dst_pitch; // local stor so asm can ref
#ifndef IS_C
long dst_pitchw = dst_pitch; // local stor so asm can ref
#endif
pSrc = pWeaveSrc; // points 1 weave line above
pSrcP = pWeaveSrcP; // "
@ -71,6 +89,8 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref
pBobP = pCopySrcP;
}
#ifndef IS_C
#ifndef _pBob
#define _pBob "%0"
#define _src_pitch2 "%1"
@ -110,11 +130,6 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref
// Save "XBX" (-fPIC)
MOVX" %%"XBX", "_oldbx"\n\t"
#ifdef IS_SSE2
// sse2 code deleted for now
#else
// simple bob first 8 bytes
MOVX" "_pBob", %%"XBX"\n\t"
MOVX" "_src_pitch2", %%"XCX"\n\t"
@ -190,4 +205,60 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref
// our best and only rating so far
"pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet
#else
for (y=1; y < FldHeight-1; y++)
{
#ifdef USE_VERTICAL_FILTER
pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4;
pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4;
pDest[2] = (3 * pBob[2] + pBob[src_pitch2 + 2]) / 4;
pDest[3] = (3 * pBob[3] + pBob[src_pitch2 + 3]) / 4;
pDest[dst_pitchw] = (pBob[0] + 3 * pBob[src_pitch2]) / 4;
pDest[dst_pitchw + 1] = (pBob[1] + 3 * pBob[src_pitch2 + 1]) / 4;
pDest[dst_pitchw + 2] = (pBob[2] + 3 * pBob[src_pitch2 + 2]) / 4;
pDest[dst_pitchw + 3] = (pBob[3] + 3 * pBob[src_pitch2 + 3]) / 4;
// simple bob last byte
pDest[Last8] = (3 * pBob[Last8] + pBob[Last8 + src_pitch2]) / 4;
pDest[Last8 + 1] = (3 * pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 4;
pDest[Last8 + 2] = (3 * pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 4;
pDest[Last8 + 3] = (3 * pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 4;
pDest[Last8 + src_pitch2] = (pBob[Last8] + 3 * pBob[Last8 + src_pitch2]) / 4;
pDest[Last8 + src_pitch2 + 1] = (pBob[Last8 + 1] + 3 * pBob[Last8 + src_pitch2 + 1]) / 4;
pDest[Last8 + src_pitch2 + 2] = (pBob[Last8 + 2] + 3 * pBob[Last8 + src_pitch2 + 2]) / 4;
pDest[Last8 + src_pitch2 + 3] = (pBob[Last8 + 3] + 3 * pBob[Last8 + src_pitch2 + 3]) / 4;
#else
pDest[0] = (pBob[0] + pBob[src_pitch2 + 1]) / 2;
pDest[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2;
pDest[2] = (pBob[2] + pBob[src_pitch2 + 2]) / 2;
pDest[3] = (pBob[3] + pBob[src_pitch2 + 3]) / 2;
// simple bob last byte
pDest[Last8] = (pBob[Last8] + pBob[Last8 + src_pitch2]) / 2;
pDest[Last8 + 1] = (pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 2;
pDest[Last8 + 2] = (pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 2;
pDest[Last8 + 3] = (pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 2;
#endif
pBob += 4;
pBobP += 4;
pSrc += 4;
pSrcP += 4;
for (x=4; x < Last8; x += 1) {
#ifdef USE_STRANGE_BOB
#include "StrangeBob.inc"
#else
#include "WierdBob.inc"
#endif
// We will keep a slight bias to using the weave pixels
// from the current location, by rating them by the min distance
// from the Bob value instead of the avg distance from that value.
// our best and only rating so far
diff = 255;
#endif

View file

@ -4,7 +4,7 @@
// Assume our pixels are layed out as follows with x the calc'd bob value
// and the other pixels are from the current field
//
// j a b c k current field
// j a b c k current field
// x calculated line
// m d e f n current field
//
@ -26,6 +26,8 @@
// end if
// pickup any thing not yet set with avg(b,e)
#ifndef IS_C
// j, n
"pxor %%mm5, %%mm5\n\t"
"pxor %%mm6, %%mm6\n\t"
@ -48,7 +50,7 @@
"movq -4(%%"XBX"), %%mm0\n\t" // value j
"movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n
"movq %%mm0, %%mm2\n\t"
"pavgb %%mm1, %%mm2\n\t" // avg(j,n)
V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(j,n)
"movq %%mm0, %%mm3\n\t"
"psubusb %%mm1, %%mm0\n\t"
"psubusb %%mm3, %%mm1\n\t"
@ -60,7 +62,6 @@
"pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(j,n) < Thres, else 00
"pand %%mm4, %%mm1\n\t"
"pand %%mm1, %%mm2\n\t"
"pand %%mm1, %%mm0\n\t"
@ -320,3 +321,68 @@
"por %%mm2, %%mm6\n\t" // our x2 value
"por %%mm1, %%mm7\n\t" // our x2 diffs
"movq %%mm7, %%mm4\n\t" // save as bob uncertainty indicator
#else
diff = -1;
best = 0;
// j, n
if (ABS (pBob[-2] - pBob[src_pitch2 - 4]) < DiffThres &&
ABS (pBob[-4] - pBob[src_pitch2 + 4]) > DiffThres) {
best = (pBob[-2] + pBob[src_pitch2 - 4]) / 2;
diff = ABS (pBob[-2] - pBob[src_pitch2 - 4]);
}
// k & m
if (ABS (pBob[2] - pBob[src_pitch2 + 4]) < DiffThres &&
ABS (pBob[4] - pBob[src_pitch2 - 4]) > DiffThres) {
best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
diff = ABS (pBob[4] - pBob[src_pitch2 - 4]);
}
// c & d
if (ABS (pBob[0] - pBob[src_pitch2 + 2]) < DiffThres &&
ABS (pBob[2] - pBob[src_pitch2 - 2]) > DiffThres) {
best = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
diff = ABS (pBob[2] - pBob[src_pitch2 - 2]);
}
// a & f
if (ABS (pBob[0] - pBob[src_pitch2 - 2]) < DiffThres &&
ABS (pBob[-2] - pBob[src_pitch2 + 2]) > DiffThres) {
best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
}
// b,e
if (ABS (pBob[0] - pBob[src_pitch2]) < DiffThres) {
best = (pBob[0] + pBob[src_pitch2]) / 2;
diff = ABS (pBob[0] - pBob[src_pitch2]);
}
// We will also calc here the max/min values to later limit comb
// so the max excursion will not exceed the Max_Comb constant
#ifdef SKIP_SEARCH
best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
#else
mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
MinVals = 0;
MaxVals = 255;
if (mov > DiffThres) {
MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best);
MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best);
}
best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
#endif
avg = (pBob[src_pitch2] + pBob[0]) / 2;
diff2 = ABS (pBob[src_pitch2] - pBob[0]);
if (diff == -1 || diff2 < diff) {
best = avg;
diff = diff2;
}
#endif

View file

@ -36,8 +36,10 @@
#define SEFUNC(x) Search_Effort_MMXEXT_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
#elif defined(IS_3DNOW)
#define SEFUNC(x) Search_Effort_3DNOW_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
#else
#elif defined(IS_MMX)
#define SEFUNC(x) Search_Effort_MMX_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
#else
#define SEFUNC(x) Search_Effort_C_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight)
#endif
#include "TomsMoCompAll2.inc"
@ -53,8 +55,10 @@
#define SEFUNC(x) Search_Effort_MMXEXT_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
#elif defined(IS_3DNOW)
#define SEFUNC(x) Search_Effort_3DNOW_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
#else
#elif defined(IS_MMX)
#define SEFUNC(x) Search_Effort_MMX_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
#else
#define SEFUNC(x) Search_Effort_C_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight)
#endif
void FUNCT_NAME(GstDeinterlaceMethod *d_method, GstDeinterlace2* object)
@ -231,7 +235,7 @@ void FUNCT_NAME(GstDeinterlaceMethod *d_method, GstDeinterlace2* object)
}
}
#ifdef HAVE_CPU_I386
#if defined(BUILD_X86_ASM) && !defined(IS_C)
__asm__ __volatile__("emms");
#endif
}

View file

@ -21,25 +21,45 @@ static inline int SEARCH_EFFORT_FUNC(0) // we don't try at all ;-)
static inline int SEARCH_EFFORT_FUNC(1)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see Search_Effort_Max() for comments
#include "SearchLoopTop.inc"
RESET_CHROMA // pretend chroma diffs was 255 each
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
static inline int SEARCH_EFFORT_FUNC(3)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see Search_Effort_Max() for comments
#include "SearchLoopTop.inc"
#include "SearchLoopOddA2.inc"
RESET_CHROMA // pretend chroma diffs was 255 each
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
static inline int SEARCH_EFFORT_FUNC(5)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see Search_Effort_Max() for comments
#include "SearchLoopTop.inc"
#include "SearchLoopOddA2.inc"
@ -47,11 +67,18 @@ static inline int SEARCH_EFFORT_FUNC(5)
RESET_CHROMA // pretend chroma diffs was 255 each
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
// 3x3 search
static inline int SEARCH_EFFORT_FUNC(9)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see SearchEffortMax() for comments
#include "SearchLoopTop.inc"
#include "SearchLoopOddA.inc"
@ -59,11 +86,18 @@ static inline int SEARCH_EFFORT_FUNC(9)
#include "SearchLoopVA.inc"
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
// Search 9 with 2 H-half pels added
static inline int SEARCH_EFFORT_FUNC(11)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see SearchEffortMax() for comments
#include "SearchLoopTop.inc"
#include "SearchLoopOddA.inc"
@ -72,11 +106,18 @@ static inline int SEARCH_EFFORT_FUNC(11)
#include "SearchLoopVA.inc"
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
// Search 11 with 2 V-half pels added
static inline int SEARCH_EFFORT_FUNC(13)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see SearchEffortMax() for comments
#include "SearchLoopTop.inc"
#include "SearchLoopOddA.inc"
@ -86,11 +127,18 @@ static inline int SEARCH_EFFORT_FUNC(13)
#include "SearchLoopVA.inc"
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
// 5x3
static inline int SEARCH_EFFORT_FUNC(15)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see SearchEffortMax() for comments
#include "SearchLoopTop.inc"
#include "SearchLoopOddA.inc"
@ -99,11 +147,18 @@ static inline int SEARCH_EFFORT_FUNC(15)
#include "SearchLoopVA.inc"
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
// 5x3 + 4 half pels
static inline int SEARCH_EFFORT_FUNC(19)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see SearchEffortMax() for comments
#include "SearchLoopTop.inc"
#include "SearchLoopOddA.inc"
@ -114,6 +169,7 @@ static inline int SEARCH_EFFORT_FUNC(19)
#include "SearchLoopVA.inc"
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
// Handle one 4x1 block of pixels
@ -121,6 +177,12 @@ static inline int SEARCH_EFFORT_FUNC(19)
static inline int SEARCH_EFFORT_FUNC(21)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see SearchLoopTop.inc for comments
#include "SearchLoopTop.inc"
@ -140,12 +202,19 @@ static inline int SEARCH_EFFORT_FUNC(21)
// blend our results and loop
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
// Handle one 4x1 block of pixels
// Search a 9x3 area, no half pels
static inline int SEARCH_EFFORT_FUNC(Max)
{
#ifdef IS_C
#define SKIP_SEARCH
#include "SearchLoopTop.inc"
#include "SearchLoopBottom.inc"
#undef SKIP_SEARCH
#else
//see SearchLoopTop.inc for comments
#include "SearchLoopTop.inc"
@ -167,6 +236,7 @@ static inline int SEARCH_EFFORT_FUNC(Max)
// blend our results and loop
#include "SearchLoop0A.inc"
#include "SearchLoopBottom.inc"
#endif
}
#undef SEARCH_EFFORT_FUNC

View file

@ -13,6 +13,7 @@
// selected for the smallest of abs(a,f), abs(c,d), or abs(b,e), etc.
#ifndef IS_C
// a,f
"movq -2(%%"XBX"), %%mm0\n\t" // value a from top left
"movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right
@ -175,6 +176,7 @@
"pxor %%mm4, %%mm4\n\t"
"psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0
"pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00
"pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00
"pand %%mm3, %%mm1\n\t"
@ -187,3 +189,59 @@
"por %%mm1, %%mm7\n\t" // our x2 diffs
"movq %%mm7, %%mm4\n\t" // save as bob uncertainty indicator
#else
// a,f
best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2;
diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]);
// c,d
if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff) {
best = (pBob[2] + pBob[src_pitch2 - 2]) / 2;
diff = ABS (pBob[2] - pBob[src_pitch2 - 2]);
}
// j,n
if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff) {
best = (pBob[-4] + pBob[src_pitch2 + 4]) / 2;
diff = ABS (pBob[-4] - pBob[src_pitch2 + 4]);
}
// k,m
if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) {
best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
}
// k,m
if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) {
best = (pBob[4] + pBob[src_pitch2 - 4]) / 2;
diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]);
}
// We will also calc here the max/min values to later limit comb
// so the max excursion will not exceed the Max_Comb constant
#ifdef SKIP_SEARCH
best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
#else
mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2]));
MinVals = 0;
MaxVals = 255;
if (mov > Max_Mov) {
MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best);
MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best);
}
best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0]));
#endif
avg = (pBob[src_pitch2] + pBob[0]) / 2;
diff2 = ABS (pBob[src_pitch2] - pBob[0]);
if (diff2 < diff) {
best = avg;
diff = diff2;
}
#endif

View file

@ -1,8 +1,6 @@
#include <string.h>
#include <math.h>
#define USE_FOR_DSCALER
// Define a few macros for CPU dependent instructions.
// I suspect I don't really understand how the C macro preprocessor works but
// this seems to get the job done. // TRB 7/01