diff --git a/ChangeLog b/ChangeLog index 6cd32dc6e6..3a83ba4665 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2008-08-25 Sebastian Dröge + + * gst/deinterlace2/gstdeinterlace2.c: + (gst_deinterlace_method_class_init): + * gst/deinterlace2/gstdeinterlace2.h: + * gst/deinterlace2/tvtime/tomsmocomp.c: + (gst_deinterlace_method_tomsmocomp_class_init): + * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc: + * gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc: + * gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc: + * gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc: + * gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc: + * gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc: + * gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h: + First part of the C implementation of the tomsmocomp deinterlacing + algorithm. This only supports search-effort=0 currently, is painfully + slow and needs some cleanup later when all search-effort settings + are implemented in C. + 2008-08-24 Ole André Vadla Ravnås * configure.ac: diff --git a/gst/deinterlace2/gstdeinterlace2.c b/gst/deinterlace2/gstdeinterlace2.c index fd9ed99e7c..0ef53813d3 100644 --- a/gst/deinterlace2/gstdeinterlace2.c +++ b/gst/deinterlace2/gstdeinterlace2.c @@ -55,7 +55,7 @@ G_DEFINE_TYPE (GstDeinterlaceMethod, gst_deinterlace_method, GST_TYPE_OBJECT); static void gst_deinterlace_method_class_init (GstDeinterlaceMethodClass * klass) { - klass->available = TRUE; + } static void diff --git a/gst/deinterlace2/gstdeinterlace2.h b/gst/deinterlace2/gstdeinterlace2.h index 01f1831b1a..408061886f 100644 --- a/gst/deinterlace2/gstdeinterlace2.h +++ b/gst/deinterlace2/gstdeinterlace2.h @@ -74,8 +74,6 @@ struct _GstDeinterlaceMethodClass { guint fields_required; guint latency; - gboolean available; - void (*deinterlace_frame) (GstDeinterlaceMethod *self, GstDeinterlace2 * parent); const gchar *name; diff --git a/gst/deinterlace2/tvtime/tomsmocomp.c b/gst/deinterlace2/tvtime/tomsmocomp.c index c27d4874ce..0565b6c707 100644 --- a/gst/deinterlace2/tvtime/tomsmocomp.c +++ b/gst/deinterlace2/tvtime/tomsmocomp.c @@ -29,9 +29,6 @@ #include "gstdeinterlace2.h" #include "plugins.h" -#include "tomsmocomp/tomsmocompmacros.h" -#include "x86-64_macros.inc" - #define GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP (gst_deinterlace_method_tomsmocomp_get_type ()) #define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP)) #define GST_IS_DEINTERLACE_METHOD_TOMSMOCOMP_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_DEINTERLACE_METHOD_TOMSMOCOMP)) @@ -72,6 +69,20 @@ Fieldcopy (void *dest, const void *src, size_t count, return 0; } +#define USE_FOR_DSCALER + +#define IS_C +#define SIMD_TYPE C +#define FUNCT_NAME tomsmocompDScaler_C +#include "tomsmocomp/TomsMoCompAll.inc" +#undef IS_C +#undef SIMD_TYPE +#undef FUNCT_NAME + +#ifdef BUILD_X86_ASM + +#include "tomsmocomp/tomsmocompmacros.h" +#include "x86-64_macros.inc" #define IS_MMX #define SIMD_TYPE MMX @@ -97,6 +108,8 @@ Fieldcopy (void *dest, const void *src, size_t count, #undef SIMD_TYPE #undef FUNCT_NAME +#endif + G_DEFINE_TYPE (GstDeinterlaceMethodTomsMoComp, gst_deinterlace_method_tomsmocomp, GST_TYPE_DEINTERLACE_METHOD); @@ -173,6 +186,7 @@ static void dim_class->nick = "tomsmocomp"; dim_class->latency = 1; +#ifdef BUILD_X86_ASM if (cpu_flags & OIL_IMPL_FLAG_MMXEXT) { dim_class->deinterlace_frame = tomsmocompDScaler_MMXEXT; } else if (cpu_flags & OIL_IMPL_FLAG_3DNOW) { @@ -180,8 +194,11 @@ static void } else if (cpu_flags & OIL_IMPL_FLAG_MMX) { dim_class->deinterlace_frame = tomsmocompDScaler_MMX; } else { - dim_class->available = FALSE; + dim_class->deinterlace_frame = tomsmocompDScaler_C; } +#else + dim_class->deinterlace_frame = tomsmocompDScaler_C; +#endif } static void diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc index 6b6ee4ca24..ce6d25341a 100644 --- a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopBottom.inc @@ -1,11 +1,9 @@ // -*- c++ -*- -#ifdef IS_SSE2 -//sse2 code deleted for now -#else - // Version for non-SSE2 +#ifndef IS_C + #ifdef SKIP_SEARCH "movq %%mm6, %%mm0\n\t" // just use the results of our wierd bob #else @@ -59,6 +57,7 @@ V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0") // pavgb mm1, qword ptr["XBX"+"XCX"] V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask) + //FIXME: XDX or XAX!! "addq "_dst_pitchw", %%"XBX // movntq qword ptr["XAX"+"XDX"], mm1 V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1") @@ -71,7 +70,6 @@ LEAX" 8(%%"XDX"), %%"XDX"\n\t" // bump offset pointer CMPX" "_Last8", %%"XDX"\n\t" // done with line? "jb 1b\n\t" // y -#endif MOVX" "_oldbx", %%"XBX"\n\t" @@ -114,3 +112,51 @@ } return 0; +#else +#ifdef SKIP_SEARCH + out = best; // just use the results of our wierd bob +#else + diff = diff - MIN (diff, 10) - 4; + if (diff < 0) + out = weave; + else + out = best; + + out = CLAMP (out, MinVals, MaxVals); +#endif + +#ifdef USE_VERTICAL_FILTER + pDest[x] = (out + pBob[0]) / 2; + pDest[x + dst_pitchw] = (pBob[src_pitch2] + out) / 2; +#else + pDest[x] = out; +#endif + pBob += 1; + pBobP += 1; + pSrc += 1; + pSrcP += 1; + } + // adjust for next line + pSrc = src_pitch2 * (y+1) + pWeaveSrc; + pSrcP = src_pitch2 * (y+1) + pWeaveSrcP; + pDest = dst_pitch2 * (y+1) + pWeaveDest + dst_pitch2; + + + if (TopFirst) + { + pBob = pCopySrc + src_pitch2; + pBobP = pCopySrcP + src_pitch2; + } + else + { + pBob = pCopySrc; + pBobP = pCopySrcP; + } + + pBob += src_pitch2 * (y+1); + pBobP += src_pitch2 * (y+1); + } + + return 0; + +#endif diff --git a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc index 64e562b841..9f42650b81 100644 --- a/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc +++ b/gst/deinterlace2/tvtime/tomsmocomp/SearchLoopTop.inc @@ -6,6 +6,8 @@ const unsigned char* pSrc; const unsigned char* pBob; const unsigned char* pBobP; +#ifndef IS_C + int64_t Max_Mov = 0x0404040404040404ull; int64_t DiffThres = 0x0f0f0f0f0f0f0f0full; int64_t YMask = 0x00ff00ff00ff00ffull; // keeps only luma @@ -19,6 +21,14 @@ int64_t ShiftMask = 0xfefffefffefffeffull; long oldbx; +#else + +#ifdef USE_STRANGE_BOB +int64_t DiffThres = 0x0f; +#endif + +#endif + // long is int32 on ARCH_368, int64 on ARCH_AMD64. Declaring it this way // saves a lot of xor's to delete 64bit garbage. @@ -30,15 +40,23 @@ long src_pitch2 = 2 * src_pitch; // even & odd lines are interleaved in Avi long dst_pitch2 = 2 * dst_pitch; +#ifdef IS_C + +long x,best,diff,avg,diff2,out; +#endif long y; -#ifdef IS_SSE2 +#if defined(IS_SSE2) long Last8 = (rowsize-16); // ofs to last 16 bytes in row for SSE2 +#elif defined(IS_C) +long Last8 = (rowsize-4); // ofs to last two pixel in row #else long Last8 = (rowsize-8); // ofs to last 8 bytes in row #endif -long dst_pitchw = dst_pitch; // local stor so asm can ref +#ifndef IS_C +long dst_pitchw = dst_pitch; // local stor so asm can ref +#endif pSrc = pWeaveSrc; // points 1 weave line above pSrcP = pWeaveSrcP; // " @@ -71,6 +89,8 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref pBobP = pCopySrcP; } +#ifndef IS_C + #ifndef _pBob #define _pBob "%0" #define _src_pitch2 "%1" @@ -110,11 +130,6 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref // Save "XBX" (-fPIC) MOVX" %%"XBX", "_oldbx"\n\t" -#ifdef IS_SSE2 - - // sse2 code deleted for now - -#else // simple bob first 8 bytes MOVX" "_pBob", %%"XBX"\n\t" MOVX" "_src_pitch2", %%"XCX"\n\t" @@ -190,4 +205,60 @@ long dst_pitchw = dst_pitch; // local stor so asm can ref // our best and only rating so far "pcmpeqb %%mm7, %%mm7\n\t" // ffff, say we didn't find anything good yet +#else + + for (y=1; y < FldHeight-1; y++) + { +#ifdef USE_VERTICAL_FILTER + pDest[0] = (3 * pBob[0] + pBob[src_pitch2]) / 4; + pDest[1] = (3 * pBob[1] + pBob[src_pitch2 + 1]) / 4; + pDest[2] = (3 * pBob[2] + pBob[src_pitch2 + 2]) / 4; + pDest[3] = (3 * pBob[3] + pBob[src_pitch2 + 3]) / 4; + pDest[dst_pitchw] = (pBob[0] + 3 * pBob[src_pitch2]) / 4; + pDest[dst_pitchw + 1] = (pBob[1] + 3 * pBob[src_pitch2 + 1]) / 4; + pDest[dst_pitchw + 2] = (pBob[2] + 3 * pBob[src_pitch2 + 2]) / 4; + pDest[dst_pitchw + 3] = (pBob[3] + 3 * pBob[src_pitch2 + 3]) / 4; + + // simple bob last byte + pDest[Last8] = (3 * pBob[Last8] + pBob[Last8 + src_pitch2]) / 4; + pDest[Last8 + 1] = (3 * pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 4; + pDest[Last8 + 2] = (3 * pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 4; + pDest[Last8 + 3] = (3 * pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 4; + pDest[Last8 + src_pitch2] = (pBob[Last8] + 3 * pBob[Last8 + src_pitch2]) / 4; + pDest[Last8 + src_pitch2 + 1] = (pBob[Last8 + 1] + 3 * pBob[Last8 + src_pitch2 + 1]) / 4; + pDest[Last8 + src_pitch2 + 2] = (pBob[Last8 + 2] + 3 * pBob[Last8 + src_pitch2 + 2]) / 4; + pDest[Last8 + src_pitch2 + 3] = (pBob[Last8 + 3] + 3 * pBob[Last8 + src_pitch2 + 3]) / 4; +#else + pDest[0] = (pBob[0] + pBob[src_pitch2 + 1]) / 2; + pDest[1] = (pBob[1] + pBob[src_pitch2 + 1]) / 2; + pDest[2] = (pBob[2] + pBob[src_pitch2 + 2]) / 2; + pDest[3] = (pBob[3] + pBob[src_pitch2 + 3]) / 2; + + // simple bob last byte + pDest[Last8] = (pBob[Last8] + pBob[Last8 + src_pitch2]) / 2; + pDest[Last8 + 1] = (pBob[Last8 + 1] + pBob[Last8 + src_pitch2 + 1]) / 2; + pDest[Last8 + 2] = (pBob[Last8 + 2] + pBob[Last8 + src_pitch2 + 2]) / 2; + pDest[Last8 + 3] = (pBob[Last8 + 3] + pBob[Last8 + src_pitch2 + 3]) / 2; +#endif + + pBob += 4; + pBobP += 4; + pSrc += 4; + pSrcP += 4; + + for (x=4; x < Last8; x += 1) { + +#ifdef USE_STRANGE_BOB +#include "StrangeBob.inc" +#else +#include "WierdBob.inc" +#endif + + // We will keep a slight bias to using the weave pixels + // from the current location, by rating them by the min distance + // from the Bob value instead of the avg distance from that value. + // our best and only rating so far + diff = 255; + + #endif diff --git a/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc b/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc index c1d2b5b715..73ce706a70 100644 --- a/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc +++ b/gst/deinterlace2/tvtime/tomsmocomp/StrangeBob.inc @@ -4,7 +4,7 @@ // Assume our pixels are layed out as follows with x the calc'd bob value // and the other pixels are from the current field // - // j a b c k current field + // j a b c k current field // x calculated line // m d e f n current field // @@ -26,6 +26,8 @@ // end if // pickup any thing not yet set with avg(b,e) +#ifndef IS_C + // j, n "pxor %%mm5, %%mm5\n\t" "pxor %%mm6, %%mm6\n\t" @@ -48,7 +50,7 @@ "movq -4(%%"XBX"), %%mm0\n\t" // value j "movq 4(%%"XBX", %%"XCX"), %%mm1\n\t" // value n "movq %%mm0, %%mm2\n\t" - "pavgb %%mm1, %%mm2\n\t" // avg(j,n) + V_PAVGB ("%%mm2", "%%mm1", "%%mm3", _ShiftMask) // avg(j,n) "movq %%mm0, %%mm3\n\t" "psubusb %%mm1, %%mm0\n\t" "psubusb %%mm3, %%mm1\n\t" @@ -60,7 +62,6 @@ "pcmpeqb %%mm3, %%mm1\n\t" // now ff where abs(j,n) < Thres, else 00 "pand %%mm4, %%mm1\n\t" - "pand %%mm1, %%mm2\n\t" "pand %%mm1, %%mm0\n\t" @@ -320,3 +321,68 @@ "por %%mm2, %%mm6\n\t" // our x2 value "por %%mm1, %%mm7\n\t" // our x2 diffs "movq %%mm7, %%mm4\n\t" // save as bob uncertainty indicator + +#else + + diff = -1; + best = 0; + // j, n + if (ABS (pBob[-2] - pBob[src_pitch2 - 4]) < DiffThres && + ABS (pBob[-4] - pBob[src_pitch2 + 4]) > DiffThres) { + best = (pBob[-2] + pBob[src_pitch2 - 4]) / 2; + diff = ABS (pBob[-2] - pBob[src_pitch2 - 4]); + } + + // k & m + if (ABS (pBob[2] - pBob[src_pitch2 + 4]) < DiffThres && + ABS (pBob[4] - pBob[src_pitch2 - 4]) > DiffThres) { + best = (pBob[4] + pBob[src_pitch2 - 4]) / 2; + diff = ABS (pBob[4] - pBob[src_pitch2 - 4]); + } + + // c & d + if (ABS (pBob[0] - pBob[src_pitch2 + 2]) < DiffThres && + ABS (pBob[2] - pBob[src_pitch2 - 2]) > DiffThres) { + best = (pBob[2] + pBob[src_pitch2 - 2]) / 2; + diff = ABS (pBob[2] - pBob[src_pitch2 - 2]); + } + + // a & f + if (ABS (pBob[0] - pBob[src_pitch2 - 2]) < DiffThres && + ABS (pBob[-2] - pBob[src_pitch2 + 2]) > DiffThres) { + best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2; + diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]); + } + + // b,e + if (ABS (pBob[0] - pBob[src_pitch2]) < DiffThres) { + best = (pBob[0] + pBob[src_pitch2]) / 2; + diff = ABS (pBob[0] - pBob[src_pitch2]); + } + +// We will also calc here the max/min values to later limit comb +// so the max excursion will not exceed the Max_Comb constant + +#ifdef SKIP_SEARCH + best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0])); +#else + mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2])); + + MinVals = 0; + MaxVals = 255; + if (mov > DiffThres) { + MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best); + MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best); + } + + best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0])); +#endif + + avg = (pBob[src_pitch2] + pBob[0]) / 2; + diff2 = ABS (pBob[src_pitch2] - pBob[0]); + + if (diff == -1 || diff2 < diff) { + best = avg; + diff = diff2; + } +#endif diff --git a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc index daa3809940..835098a07b 100644 --- a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc +++ b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll.inc @@ -36,8 +36,10 @@ #define SEFUNC(x) Search_Effort_MMXEXT_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight) #elif defined(IS_3DNOW) #define SEFUNC(x) Search_Effort_3DNOW_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight) -#else +#elif defined(IS_MMX) #define SEFUNC(x) Search_Effort_MMX_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight) +#else +#define SEFUNC(x) Search_Effort_C_##x(int src_pitch, int dst_pitch, int rowsize, const unsigned char *pWeaveSrc, const unsigned char *pWeaveSrcP, unsigned char *pWeaveDest, int IsOdd, const unsigned char *pCopySrc, const unsigned char *pCopySrcP, int FldHeight) #endif #include "TomsMoCompAll2.inc" @@ -53,8 +55,10 @@ #define SEFUNC(x) Search_Effort_MMXEXT_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight) #elif defined(IS_3DNOW) #define SEFUNC(x) Search_Effort_3DNOW_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight) -#else +#elif defined(IS_MMX) #define SEFUNC(x) Search_Effort_MMX_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight) +#else +#define SEFUNC(x) Search_Effort_C_##x(src_pitch, dst_pitch, rowsize, pWeaveSrc, pWeaveSrcP, pWeaveDest, IsOdd, pCopySrc, pCopySrcP, FldHeight) #endif void FUNCT_NAME(GstDeinterlaceMethod *d_method, GstDeinterlace2* object) @@ -231,7 +235,7 @@ void FUNCT_NAME(GstDeinterlaceMethod *d_method, GstDeinterlace2* object) } } -#ifdef HAVE_CPU_I386 +#if defined(BUILD_X86_ASM) && !defined(IS_C) __asm__ __volatile__("emms"); #endif } diff --git a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc index 6d3447e546..6a68f08bbb 100644 --- a/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc +++ b/gst/deinterlace2/tvtime/tomsmocomp/TomsMoCompAll2.inc @@ -21,25 +21,45 @@ static inline int SEARCH_EFFORT_FUNC(0) // we don't try at all ;-) static inline int SEARCH_EFFORT_FUNC(1) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see Search_Effort_Max() for comments #include "SearchLoopTop.inc" RESET_CHROMA // pretend chroma diffs was 255 each #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } static inline int SEARCH_EFFORT_FUNC(3) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see Search_Effort_Max() for comments #include "SearchLoopTop.inc" #include "SearchLoopOddA2.inc" RESET_CHROMA // pretend chroma diffs was 255 each #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } static inline int SEARCH_EFFORT_FUNC(5) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see Search_Effort_Max() for comments #include "SearchLoopTop.inc" #include "SearchLoopOddA2.inc" @@ -47,11 +67,18 @@ static inline int SEARCH_EFFORT_FUNC(5) RESET_CHROMA // pretend chroma diffs was 255 each #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } // 3x3 search static inline int SEARCH_EFFORT_FUNC(9) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see SearchEffortMax() for comments #include "SearchLoopTop.inc" #include "SearchLoopOddA.inc" @@ -59,11 +86,18 @@ static inline int SEARCH_EFFORT_FUNC(9) #include "SearchLoopVA.inc" #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } // Search 9 with 2 H-half pels added static inline int SEARCH_EFFORT_FUNC(11) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see SearchEffortMax() for comments #include "SearchLoopTop.inc" #include "SearchLoopOddA.inc" @@ -72,11 +106,18 @@ static inline int SEARCH_EFFORT_FUNC(11) #include "SearchLoopVA.inc" #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } // Search 11 with 2 V-half pels added static inline int SEARCH_EFFORT_FUNC(13) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see SearchEffortMax() for comments #include "SearchLoopTop.inc" #include "SearchLoopOddA.inc" @@ -86,11 +127,18 @@ static inline int SEARCH_EFFORT_FUNC(13) #include "SearchLoopVA.inc" #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } // 5x3 static inline int SEARCH_EFFORT_FUNC(15) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see SearchEffortMax() for comments #include "SearchLoopTop.inc" #include "SearchLoopOddA.inc" @@ -99,11 +147,18 @@ static inline int SEARCH_EFFORT_FUNC(15) #include "SearchLoopVA.inc" #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } // 5x3 + 4 half pels static inline int SEARCH_EFFORT_FUNC(19) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see SearchEffortMax() for comments #include "SearchLoopTop.inc" #include "SearchLoopOddA.inc" @@ -114,6 +169,7 @@ static inline int SEARCH_EFFORT_FUNC(19) #include "SearchLoopVA.inc" #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } // Handle one 4x1 block of pixels @@ -121,6 +177,12 @@ static inline int SEARCH_EFFORT_FUNC(19) static inline int SEARCH_EFFORT_FUNC(21) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see SearchLoopTop.inc for comments #include "SearchLoopTop.inc" @@ -140,12 +202,19 @@ static inline int SEARCH_EFFORT_FUNC(21) // blend our results and loop #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } // Handle one 4x1 block of pixels // Search a 9x3 area, no half pels static inline int SEARCH_EFFORT_FUNC(Max) { +#ifdef IS_C +#define SKIP_SEARCH +#include "SearchLoopTop.inc" +#include "SearchLoopBottom.inc" +#undef SKIP_SEARCH +#else //see SearchLoopTop.inc for comments #include "SearchLoopTop.inc" @@ -167,6 +236,7 @@ static inline int SEARCH_EFFORT_FUNC(Max) // blend our results and loop #include "SearchLoop0A.inc" #include "SearchLoopBottom.inc" +#endif } #undef SEARCH_EFFORT_FUNC diff --git a/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc b/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc index 36fd9d2289..6cbd1b8dcb 100644 --- a/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc +++ b/gst/deinterlace2/tvtime/tomsmocomp/WierdBob.inc @@ -13,6 +13,7 @@ // selected for the smallest of abs(a,f), abs(c,d), or abs(b,e), etc. +#ifndef IS_C // a,f "movq -2(%%"XBX"), %%mm0\n\t" // value a from top left "movq 2(%%"XBX", %%"XCX"), %%mm1\n\t" // value f from bottom right @@ -175,6 +176,7 @@ "pxor %%mm4, %%mm4\n\t" "psubusb %%mm7, %%mm3\n\t" // nonzero where new weights bigger, else 0 "pcmpeqb %%mm4, %%mm3\n\t" // now ff where new better, else 00 + "pcmpeqb %%mm3, %%mm4\n\t" // here ff where old better, else 00 "pand %%mm3, %%mm1\n\t" @@ -187,3 +189,59 @@ "por %%mm1, %%mm7\n\t" // our x2 diffs "movq %%mm7, %%mm4\n\t" // save as bob uncertainty indicator +#else + + // a,f + best = (pBob[-2] + pBob[src_pitch2 + 2]) / 2; + diff = ABS (pBob[-2] - pBob[src_pitch2 + 2]); + + // c,d + if (ABS (pBob[2] - pBob[src_pitch2 - 2]) < diff) { + best = (pBob[2] + pBob[src_pitch2 - 2]) / 2; + diff = ABS (pBob[2] - pBob[src_pitch2 - 2]); + } + + // j,n + if (ABS (pBob[-4] - pBob[src_pitch2 + 4]) < diff) { + best = (pBob[-4] + pBob[src_pitch2 + 4]) / 2; + diff = ABS (pBob[-4] - pBob[src_pitch2 + 4]); + } + + // k,m + if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) { + best = (pBob[4] + pBob[src_pitch2 - 4]) / 2; + diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]); + } + + // k,m + if (ABS (pBob[4] - pBob[src_pitch2 - 4]) < diff) { + best = (pBob[4] + pBob[src_pitch2 - 4]) / 2; + diff = ABS (pBob[-4] - pBob[src_pitch2 - 4]); + } + +// We will also calc here the max/min values to later limit comb +// so the max excursion will not exceed the Max_Comb constant + +#ifdef SKIP_SEARCH + best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0])); +#else + mov = MAX (ABS (pBob[0] - pBobP[0]), ABS (pBob[src_pitch2] - pBobP[src_pitch2])); + + MinVals = 0; + MaxVals = 255; + if (mov > Max_Mov) { + MinVals = MAX (MIN (pBob[0], pBob[src_pitch2]), best); + MaxVals = MIN (MAX (pBob[0], pBob[src_pitch2]), best); + } + + best = CLAMP (best, MIN (pBob[src_pitch2], pBob[0]), MAX (pBob[src_pitch2], pBob[0])); +#endif + + avg = (pBob[src_pitch2] + pBob[0]) / 2; + diff2 = ABS (pBob[src_pitch2] - pBob[0]); + + if (diff2 < diff) { + best = avg; + diff = diff2; + } +#endif diff --git a/gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h b/gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h index 156be89299..7e8147ec96 100644 --- a/gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h +++ b/gst/deinterlace2/tvtime/tomsmocomp/tomsmocompmacros.h @@ -1,8 +1,6 @@ #include #include -#define USE_FOR_DSCALER - // Define a few macros for CPU dependent instructions. // I suspect I don't really understand how the C macro preprocessor works but // this seems to get the job done. // TRB 7/01