mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-25 16:48:11 +00:00
[MOVED FROM BAD 16/56] gst/deinterlace2/tvtime/: Add a C implementation for the greedyh deinterlacing method, clean up the code a bit and ma...
Original commit message from CVS: * gst/deinterlace2/tvtime/greedyh.asm: * gst/deinterlace2/tvtime/greedyh.c: (greedyDScaler_C), (deinterlace_frame_di_greedyh), (dscaler_greedyh_get_method): * gst/deinterlace2/tvtime/greedyhmacros.h: Add a C implementation for the greedyh deinterlacing method, clean up the code a bit and mark the SSE version as MMXEXT as it doesn't require any SSE instructions.
This commit is contained in:
parent
07f408a3ea
commit
57dd0e85d1
3 changed files with 421 additions and 320 deletions
|
@ -28,88 +28,47 @@
|
|||
|
||||
#include "x86-64_macros.inc"
|
||||
|
||||
void FUNCT_NAME( GstDeinterlace2 *object)
|
||||
void
|
||||
FUNCT_NAME (uint8_t * L1, uint8_t * L2, uint8_t * L3, uint8_t * L2P,
|
||||
uint8_t * Dest, int size)
|
||||
{
|
||||
int64_t i;
|
||||
int InfoIsOdd = 0;
|
||||
|
||||
// in tight loop some vars are accessed faster in local storage
|
||||
int64_t YMask = 0x00ff00ff00ff00ffull; // to keep only luma
|
||||
int64_t UVMask = 0xff00ff00ff00ff00ull; // to keep only chroma
|
||||
int64_t ShiftMask = 0xfefffefffefffeffull; // to avoid shifting chroma to luma
|
||||
int64_t ShiftMask = 0xfefefefefefefefeull; // to avoid shifting chroma to luma
|
||||
int64_t QW256 = 0x0100010001000100ull; // 4 256's
|
||||
|
||||
// Set up our two parms that are actually evaluated for each pixel
|
||||
i=GreedyMaxComb;
|
||||
int64_t MaxComb = i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;
|
||||
|
||||
i = GreedyMotionThreshold; // scale to range of 0-257
|
||||
int64_t MotionThreshold = i << 48 | i << 32 | i << 16 | i | UVMask;
|
||||
|
||||
i = GreedyMotionSense; // scale to range of 0-257
|
||||
int64_t MotionSense = i << 48 | i << 32 | i << 16 | i;
|
||||
|
||||
int Line;
|
||||
int64_t MaxComb;
|
||||
int64_t MotionThreshold;
|
||||
int64_t MotionSense;
|
||||
int64_t i;
|
||||
long LoopCtr;
|
||||
unsigned int Pitch = object->field_stride;
|
||||
|
||||
unsigned char* L1; // ptr to Line1, of 3
|
||||
unsigned char* L2; // ptr to Line2, the weave line
|
||||
unsigned char* L3; // ptr to Line3
|
||||
|
||||
unsigned char* L2P; // ptr to prev Line2
|
||||
unsigned char* Dest = GST_BUFFER_DATA(object->out_buf);
|
||||
long oldbx;
|
||||
|
||||
int64_t QW256B;
|
||||
int64_t LastAvg = 0; //interp value from left qword
|
||||
|
||||
// Set up our two parms that are actually evaluated for each pixel
|
||||
i = GreedyMaxComb;
|
||||
MaxComb =
|
||||
i << 56 | i << 48 | i << 40 | i << 32 | i << 24 | i << 16 | i << 8 | i;
|
||||
|
||||
i = GreedyMotionThreshold; // scale to range of 0-257
|
||||
MotionThreshold = i << 48 | i << 32 | i << 16 | i | UVMask;
|
||||
|
||||
i = GreedyMotionSense; // scale to range of 0-257
|
||||
MotionSense = i << 48 | i << 32 | i << 16 | i;
|
||||
|
||||
i = 0xffffffff - 256;
|
||||
QW256B = i << 48 | i << 32 | i << 16 | i; // save a couple instr on PMINSW instruct.
|
||||
|
||||
|
||||
// copy first even line no matter what, and the first odd line if we're
|
||||
// processing an EVEN field. (note diff from other deint rtns.)
|
||||
|
||||
if (object->field_history[object->history_count-1].flags == PICTURE_INTERLACED_BOTTOM) {
|
||||
InfoIsOdd = 1;
|
||||
|
||||
L1 = GST_BUFFER_DATA(object->field_history[object->history_count-2].buf);
|
||||
L2 = GST_BUFFER_DATA(object->field_history[object->history_count-1].buf);
|
||||
L3 = L1 + Pitch;
|
||||
L2P = GST_BUFFER_DATA(object->field_history[object->history_count-3].buf);
|
||||
|
||||
// copy first even line
|
||||
object->pMemcpy(Dest, L1, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
}
|
||||
else {
|
||||
InfoIsOdd = 0;
|
||||
L1 = GST_BUFFER_DATA(object->field_history[object->history_count-2].buf);
|
||||
L2 = GST_BUFFER_DATA(object->field_history[object->history_count-1].buf) + Pitch;
|
||||
L3 = L1 + Pitch;
|
||||
L2P = GST_BUFFER_DATA(object->field_history[object->history_count-3].buf) + Pitch;
|
||||
|
||||
// copy first even line
|
||||
object->pMemcpy(Dest, GST_BUFFER_DATA(object->field_history[0].buf), object->line_length);
|
||||
Dest += object->output_stride;
|
||||
// then first odd line
|
||||
object->pMemcpy(Dest, L1, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
}
|
||||
|
||||
|
||||
long oldbx;
|
||||
|
||||
for (Line = 0; Line < (object->field_height - 1); ++Line) {
|
||||
LoopCtr = object->line_length / 8 - 1; // there are LineLength / 8 qwords per line but do 1 less, adj at end of loop
|
||||
LoopCtr = size / 8 - 1; // there are LineLength / 8 qwords per line but do 1 less, adj at end of loop
|
||||
|
||||
// For ease of reading, the comments below assume that we're operating on an odd
|
||||
// field (i.e., that InfoIsOdd is true). Assume the obvious for even lines..
|
||||
__asm__ __volatile__
|
||||
(
|
||||
__asm__ __volatile__ (
|
||||
// save ebx (-fPIC)
|
||||
MOVX " %%" XBX ", %[oldbx]\n\t"
|
||||
|
||||
MOVX " %[L1], %%" XAX "\n\t"
|
||||
LEAX " 8(%%" XAX "), %%" XBX "\n\t" // next qword needed by DJR
|
||||
MOVX " %[L3], %%" XCX "\n\t"
|
||||
|
@ -120,12 +79,12 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
|
||||
".align 8\n\t"
|
||||
"1:\n\t"
|
||||
|
||||
"movq (%%" XSI "), %%mm0\n\t" // L2 - the newest weave pixel value
|
||||
"movq (%%" XAX "), %%mm1\n\t" // L1 - the top pixel
|
||||
"movq (%%" XDX "), %%mm2\n\t" // L2P - the prev weave pixel
|
||||
"movq (%%" XAX ", %%" XCX "), %%mm3\n\t" // L3, next odd row
|
||||
"movq %%mm1, %%mm6\n\t" // L1 - get simple single pixel interp
|
||||
|
||||
// pavgb mm6, mm3 // use macro below
|
||||
V_PAVGB ("%%mm6", "%%mm3", "%%mm4", "%[ShiftMask]")
|
||||
|
||||
|
@ -140,9 +99,9 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
"movq %%mm6, %%mm7\n\t" // copy of simple bob pixel
|
||||
"psllq $16, %%mm7\n\t" // left justify 3 pixels
|
||||
"por %%mm7, %%mm4\n\t" // and combine
|
||||
|
||||
"movq (%%" XBX "), %%mm5\n\t" // next horiz qword from L1
|
||||
// pavgb mm5, qword ptr[ebx+ecx] // next horiz qword from L3, use macro below
|
||||
|
||||
V_PAVGB ("%%mm5", "(%%" XBX ",%%" XCX ")", "%%mm7", "%[ShiftMask]")
|
||||
"psllq $48, %%mm5\n\t" // left just 1 pixel
|
||||
"movq %%mm6, %%mm7\n\t" // another copy of simple bob pixel
|
||||
|
@ -193,8 +152,8 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
// mm3 = L3
|
||||
// mm4 = the best of L2,L2P weave pixel, base upon comb
|
||||
// mm6 = the avg interpolated value, if we need to use it
|
||||
|
||||
// Let's measure movement, as how much the weave pixel has changed
|
||||
|
||||
"movq %%mm2, %%mm7\n\t"
|
||||
"psubusb %%mm0, %%mm2\n\t"
|
||||
"psubusb %%mm7, %%mm0\n\t"
|
||||
|
@ -204,12 +163,14 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
// of the high/low range L1-L3 by more than MaxComb.
|
||||
// This allows some comb but limits the damages and also allows more
|
||||
// detail than a boring oversmoothed clip.
|
||||
|
||||
"movq %%mm1, %%mm2\n\t" // copy L1
|
||||
// pmaxub mm2, mm3 // use macro
|
||||
V_PMAXUB ("%%mm2", "%%mm3") // now = Max(L1,L3)
|
||||
"movq %%mm1, %%mm5\n\t" // copy L1
|
||||
// pminub mm5, mm3 // now = Min(L1,L3), use macro
|
||||
V_PMINUB ("%%mm5", "%%mm3", "%%mm7")
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
"psubusb %[MaxComb], %%mm5\n\t" // lower min by diff
|
||||
"paddusb %[MaxComb], %%mm2\n\t" // increase max by diff
|
||||
|
@ -222,7 +183,7 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
"psubusb %[MotionThreshold], %%mm0\n\t" // test Threshold, clear chroma change >>>??
|
||||
"pmullw %[MotionSense], %%mm0\n\t" // mul by user factor, keep low 16 bits
|
||||
"movq %[QW256], %%mm7\n\t"
|
||||
#ifdef HAVE_SSE
|
||||
#if SIMD_TYPE == MMXEXT
|
||||
"pminsw %%mm7, %%mm0\n\t" // max = 256
|
||||
#else
|
||||
"paddusw %[QW256B], %%mm0\n\t" // add, may sat at fff..
|
||||
|
@ -236,13 +197,10 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
"pmullw %%mm0, %%mm6\n\t" // use more bob for large motion
|
||||
"paddusw %%mm6, %%mm4\n\t" // combine
|
||||
"psrlw $8, %%mm4\n\t" // div by 256 to get weighted avg
|
||||
|
||||
// chroma comes from weave pixel
|
||||
"pand %[UVMask], %%mm2\n\t" // keep chroma
|
||||
"por %%mm4, %%mm2\n\t" // and combine
|
||||
|
||||
V_MOVNTQ ("(%%" XDI ")", "%%mm2") // move in our clipped best, use macro
|
||||
|
||||
// bump ptrs and loop
|
||||
LEAX " 8(%%" XAX "), %%" XAX "\n\t"
|
||||
LEAX " 8(%%" XBX "), %%" XBX "\n\t"
|
||||
|
@ -250,6 +208,7 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
LEAX " 8(%%" XDI "), %%" XDI "\n\t"
|
||||
LEAX " 8(%%" XSI "), %%" XSI "\n\t"
|
||||
DECX " %[LoopCtr]\n\t"
|
||||
|
||||
"jg 1b\n\t" // loop if not to last line
|
||||
// note P-III default assumes backward branches taken
|
||||
"jl 1f\n\t" // done
|
||||
|
@ -258,8 +217,7 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
|
||||
"1:\n\t"
|
||||
MOVX " %[oldbx], %%" XBX "\n\t"
|
||||
|
||||
: /* no outputs */
|
||||
"emms\n\t": /* no outputs */
|
||||
|
||||
:[LastAvg] "m" (LastAvg),
|
||||
[L1] "m" (L1),
|
||||
|
@ -277,32 +235,9 @@ void FUNCT_NAME( GstDeinterlace2 *object)
|
|||
[LoopCtr] "m" (LoopCtr),
|
||||
[QW256] "m" (QW256),
|
||||
[oldbx] "m" (oldbx)
|
||||
|
||||
: XAX, XCX, XDX, XSI, XDI,
|
||||
#ifdef HAVE_CPU_I386
|
||||
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
|
||||
#endif
|
||||
/* FIXME: breaks unless compiling with -mmmx
|
||||
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", */
|
||||
"memory", "cc"
|
||||
);
|
||||
|
||||
Dest += object->output_stride;
|
||||
object->pMemcpy(Dest, L3, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
|
||||
L1 += Pitch;
|
||||
L2 += Pitch;
|
||||
L3 += Pitch;
|
||||
L2P += Pitch;
|
||||
}
|
||||
|
||||
if (InfoIsOdd) {
|
||||
object->pMemcpy(Dest, L2, object->line_length);
|
||||
}
|
||||
|
||||
// clear out the MMX registers ready for doing floating point again
|
||||
#ifdef HAVE_CPU_I386
|
||||
__asm__ __volatile__ ("emms\n\t");
|
||||
#endif
|
||||
"memory", "cc");
|
||||
}
|
||||
|
|
|
@ -41,51 +41,244 @@
|
|||
#include "gstdeinterlace2.h"
|
||||
#include "speedy.h"
|
||||
|
||||
static const unsigned int GreedyMaxComb = 5;
|
||||
static const unsigned int GreedyMotionThreshold = 25;
|
||||
static const unsigned int GreedyMotionSense = 30;
|
||||
|
||||
#define MAXCOMB_DEFAULT 5
|
||||
#define MOTIONTHRESHOLD_DEFAULT 25
|
||||
#define MOTIONSENSE_DEFAULT 30
|
||||
void
|
||||
greedyDScaler_C (uint8_t * L1, uint8_t * L2, uint8_t * L3, uint8_t * L2P,
|
||||
uint8_t * Dest, int size)
|
||||
{
|
||||
int Pos;
|
||||
uint8_t l1_l, l1_1_l, l3_l, l3_1_l;
|
||||
uint8_t l1_c, l1_1_c, l3_c, l3_1_c;
|
||||
uint8_t avg_l, avg_c, avg_l_1, avg_c_1;
|
||||
uint8_t avg_l__1 = 0, avg_c__1 = 0;
|
||||
uint8_t avg_s_l, avg_s_c;
|
||||
uint8_t avg_sc_l, avg_sc_c;
|
||||
uint8_t best_l, best_c;
|
||||
uint16_t mov_l;
|
||||
uint8_t out_l, out_c;
|
||||
uint8_t l2_l, l2_c, lp2_l, lp2_c;
|
||||
uint8_t l2_l_diff, l2_c_diff, lp2_l_diff, lp2_c_diff;
|
||||
uint8_t min_l, min_c, max_l, max_c;
|
||||
|
||||
unsigned int GreedyMaxComb;
|
||||
for (Pos = 0; Pos < size; Pos += 2) {
|
||||
l1_l = L1[0];
|
||||
l1_c = L1[1];
|
||||
l3_l = L3[0];
|
||||
l3_c = L3[1];
|
||||
|
||||
unsigned int GreedyMotionThreshold;
|
||||
if (Pos == size - 1) {
|
||||
l1_1_l = l1_l;
|
||||
l1_1_c = l1_c;
|
||||
l3_1_l = l3_l;
|
||||
l3_1_c = l3_c;
|
||||
} else {
|
||||
l1_1_l = L1[2];
|
||||
l1_1_c = L1[3];
|
||||
l3_1_l = L3[2];
|
||||
l3_1_c = L3[3];
|
||||
}
|
||||
|
||||
unsigned int GreedyMotionSense;
|
||||
/* Average of L1 and L3 */
|
||||
avg_l = (l1_l + l3_l) / 2;
|
||||
avg_c = (l1_c + l3_c) / 2;
|
||||
|
||||
/* Average of next L1 and next L3 */
|
||||
avg_l_1 = (l1_1_l + l3_1_l) / 2;
|
||||
avg_c_1 = (l1_1_c + l3_1_c) / 2;
|
||||
|
||||
#define IS_SSE
|
||||
#define SSE_TYPE SSE
|
||||
#define FUNCT_NAME greedyDScaler_SSE
|
||||
/* Calculate average of one pixel forward and previous */
|
||||
avg_s_l = (avg_l__1 + avg_l_1) / 2;
|
||||
avg_s_c = (avg_c__1 + avg_c_1) / 2;
|
||||
|
||||
/* Calculate average of center and surrounding pixels */
|
||||
avg_sc_l = (avg_l + avg_s_l) / 2;
|
||||
avg_sc_c = (avg_c + avg_s_c) / 2;
|
||||
|
||||
/* move forward */
|
||||
avg_l__1 = avg_l;
|
||||
avg_c__1 = avg_c;
|
||||
|
||||
/* Get best L2/L2P, i.e. least diff from above average */
|
||||
l2_l = L2[0];
|
||||
l2_c = L2[1];
|
||||
lp2_l = L2P[0];
|
||||
lp2_c = L2P[1];
|
||||
|
||||
l2_l_diff = ABS (l2_l - avg_sc_l);
|
||||
l2_c_diff = ABS (l2_c - avg_sc_c);
|
||||
|
||||
lp2_l_diff = ABS (lp2_l - avg_sc_l);
|
||||
lp2_c_diff = ABS (lp2_c - avg_sc_c);
|
||||
|
||||
if (l2_l_diff > lp2_l_diff)
|
||||
best_l = lp2_l;
|
||||
else
|
||||
best_l = l2_l;
|
||||
|
||||
if (l2_c_diff > lp2_c_diff)
|
||||
best_c = lp2_c;
|
||||
else
|
||||
best_c = l2_c;
|
||||
|
||||
/* Clip this best L2/L2P by L1/L3 and allow to differ by GreedyMaxComb */
|
||||
max_l = MAX (l1_l, l3_l);
|
||||
min_l = MIN (l1_l, l3_l);
|
||||
|
||||
if (max_l < 256 - GreedyMaxComb)
|
||||
max_l += GreedyMaxComb;
|
||||
else
|
||||
max_l = 255;
|
||||
|
||||
if (min_l > GreedyMaxComb)
|
||||
min_l -= GreedyMaxComb;
|
||||
else
|
||||
min_l = 0;
|
||||
|
||||
max_c = MAX (l1_c, l3_c);
|
||||
min_c = MIN (l1_c, l3_c);
|
||||
|
||||
if (max_c < 256 - GreedyMaxComb)
|
||||
max_c += GreedyMaxComb;
|
||||
else
|
||||
max_c = 255;
|
||||
|
||||
if (min_c > GreedyMaxComb)
|
||||
min_c -= GreedyMaxComb;
|
||||
else
|
||||
min_c = 0;
|
||||
|
||||
out_l = CLAMP (best_l, min_l, max_l);
|
||||
out_c = CLAMP (best_c, min_c, max_c);
|
||||
|
||||
/* Do motion compensation for luma, i.e. how much
|
||||
* the weave pixel differs */
|
||||
mov_l = ABS (l2_l - lp2_l);
|
||||
if (mov_l > GreedyMotionThreshold)
|
||||
mov_l -= GreedyMotionThreshold;
|
||||
else
|
||||
mov_l = 0;
|
||||
|
||||
mov_l = mov_l * GreedyMotionSense;
|
||||
if (mov_l > 256)
|
||||
mov_l = 256;
|
||||
|
||||
/* Weighted sum on clipped weave pixel and average */
|
||||
out_l = (out_l * (256 - mov_l) + avg_sc_l * mov_l) / 256;
|
||||
|
||||
Dest[0] = out_l;
|
||||
Dest[1] = out_c;
|
||||
|
||||
Dest += 2;
|
||||
L1 += 2;
|
||||
L2 += 2;
|
||||
L3 += 2;
|
||||
L2P += 2;
|
||||
}
|
||||
}
|
||||
|
||||
#define IS_MMXEXT
|
||||
#define SIMD_TYPE MMXEXT
|
||||
#define FUNCT_NAME greedyDScaler_MMXEXT
|
||||
#include "greedyh.asm"
|
||||
#undef SSE_TYPE
|
||||
#undef IS_SSE
|
||||
#undef SIMD_TYPE
|
||||
#undef IS_MMXEXT
|
||||
#undef FUNCT_NAME
|
||||
|
||||
#define IS_3DNOW
|
||||
#define IS_TDNOW
|
||||
#define SIMD_TYPE TDNOW
|
||||
#define FUNCT_NAME greedyDScaler_3DNOW
|
||||
#define SSE_TYPE 3DNOW
|
||||
#include "greedyh.asm"
|
||||
#undef SSE_TYPE
|
||||
#undef IS_3DNOW
|
||||
#undef SIMD_TYPE
|
||||
#undef IS_TDNOW
|
||||
#undef FUNCT_NAME
|
||||
|
||||
#define IS_MMX
|
||||
#define SSE_TYPE MMX
|
||||
#define SIMD_TYPE MMX
|
||||
#define FUNCT_NAME greedyDScaler_MMX
|
||||
#include "greedyh.asm"
|
||||
#undef SSE_TYPE
|
||||
#undef SIMD_TYPE
|
||||
#undef IS_MMX
|
||||
#undef FUNCT_NAME
|
||||
|
||||
void
|
||||
static void
|
||||
deinterlace_frame_di_greedyh (GstDeinterlace2 * object)
|
||||
{
|
||||
if (object->cpu_feature_flags & OIL_IMPL_FLAG_SSE) {
|
||||
greedyh_filter_sse (object);
|
||||
void (*func) (uint8_t * L1, uint8_t * L2, uint8_t * L3, uint8_t * L2P,
|
||||
uint8_t * Dest, int size);
|
||||
|
||||
int InfoIsOdd = 0;
|
||||
int Line;
|
||||
unsigned int Pitch = object->field_stride;
|
||||
|
||||
unsigned char *L1; // ptr to Line1, of 3
|
||||
unsigned char *L2; // ptr to Line2, the weave line
|
||||
unsigned char *L3; // ptr to Line3
|
||||
|
||||
unsigned char *L2P; // ptr to prev Line2
|
||||
unsigned char *Dest = GST_BUFFER_DATA (object->out_buf);
|
||||
|
||||
if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMXEXT) {
|
||||
func = greedyDScaler_MMXEXT;
|
||||
} else if (object->cpu_feature_flags & OIL_IMPL_FLAG_3DNOW) {
|
||||
greedyh_filter_3dnow (object);
|
||||
func = greedyDScaler_3DNOW;
|
||||
} else if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMX) {
|
||||
func = greedyDScaler_MMX;
|
||||
} else {
|
||||
greedyh_filter_mmx (object);
|
||||
func = greedyDScaler_C;
|
||||
}
|
||||
|
||||
// copy first even line no matter what, and the first odd line if we're
|
||||
// processing an EVEN field. (note diff from other deint rtns.)
|
||||
|
||||
if (object->field_history[object->history_count - 1].flags ==
|
||||
PICTURE_INTERLACED_BOTTOM) {
|
||||
InfoIsOdd = 1;
|
||||
|
||||
L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
|
||||
L2 = GST_BUFFER_DATA (object->field_history[object->history_count - 1].buf);
|
||||
L3 = L1 + Pitch;
|
||||
L2P =
|
||||
GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf);
|
||||
|
||||
// copy first even line
|
||||
object->pMemcpy (Dest, L1, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
} else {
|
||||
InfoIsOdd = 0;
|
||||
L1 = GST_BUFFER_DATA (object->field_history[object->history_count - 2].buf);
|
||||
L2 = GST_BUFFER_DATA (object->field_history[object->history_count -
|
||||
1].buf) + Pitch;
|
||||
L3 = L1 + Pitch;
|
||||
L2P =
|
||||
GST_BUFFER_DATA (object->field_history[object->history_count - 3].buf) +
|
||||
Pitch;
|
||||
|
||||
// copy first even line
|
||||
object->pMemcpy (Dest, GST_BUFFER_DATA (object->field_history[0].buf),
|
||||
object->line_length);
|
||||
Dest += object->output_stride;
|
||||
// then first odd line
|
||||
object->pMemcpy (Dest, L1, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
}
|
||||
|
||||
for (Line = 0; Line < (object->field_height - 1); ++Line) {
|
||||
func (L1, L2, L3, L2P, Dest, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
object->pMemcpy (Dest, L3, object->line_length);
|
||||
Dest += object->output_stride;
|
||||
|
||||
L1 += Pitch;
|
||||
L2 += Pitch;
|
||||
L3 += Pitch;
|
||||
L2P += Pitch;
|
||||
}
|
||||
|
||||
if (InfoIsOdd) {
|
||||
object->pMemcpy (Dest, L2, object->line_length);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,7 +287,7 @@ static deinterlace_method_t greedyh_method = {
|
|||
"Motion Adaptive: Advanced Detection",
|
||||
"AdaptiveAdvanced",
|
||||
4,
|
||||
OIL_IMPL_FLAG_MMX,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
|
@ -117,32 +310,5 @@ static deinterlace_method_t greedyh_method = {
|
|||
deinterlace_method_t *
|
||||
dscaler_greedyh_get_method (void)
|
||||
{
|
||||
greedyh_init ();
|
||||
return &greedyh_method;
|
||||
}
|
||||
|
||||
void
|
||||
greedyh_init (void)
|
||||
{
|
||||
GreedyMaxComb = MAXCOMB_DEFAULT;
|
||||
GreedyMotionThreshold = MOTIONTHRESHOLD_DEFAULT;
|
||||
GreedyMotionSense = MOTIONSENSE_DEFAULT;
|
||||
}
|
||||
|
||||
void
|
||||
greedyh_filter_mmx (GstDeinterlace2 * object)
|
||||
{
|
||||
greedyDScaler_MMX (object);
|
||||
}
|
||||
|
||||
void
|
||||
greedyh_filter_3dnow (GstDeinterlace2 * object)
|
||||
{
|
||||
greedyDScaler_3DNOW (object);
|
||||
}
|
||||
|
||||
void
|
||||
greedyh_filter_sse (GstDeinterlace2 * object)
|
||||
{
|
||||
greedyDScaler_SSE (object);
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
// BEFORE USING THESE YOU MUST SET:
|
||||
|
||||
// #define SSE_TYPE SSE (or MMX or 3DNOW)
|
||||
// #define SIMD_TYPE MMXEXT (or MMX or TDNOW)
|
||||
|
||||
// some macros for pavgb instruction
|
||||
// V_PAVGB(mmr1, mmr2, mmr work register, smask) mmr2 may = mmrw if you can trash it
|
||||
|
@ -33,21 +33,21 @@
|
|||
"pand "smask", "mmr1"\n\t" \
|
||||
"psrlw $1, "mmr1"\n\t" \
|
||||
"paddusb "mmrw", "mmr1"\n\t"
|
||||
#define V_PAVGB_SSE(mmr1, mmr2, mmrw, smask) "pavgb "mmr2", "mmr1"\n\t"
|
||||
#define V_PAVGB_3DNOW(mmr1, mmr2, mmrw, smask) "pavgusb "mmr2", "mmr1"\n\t"
|
||||
#define V_PAVGB(mmr1, mmr2, mmrw, smask) V_PAVGB2(mmr1, mmr2, mmrw, smask, SSE_TYPE)
|
||||
#define V_PAVGB2(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp)
|
||||
#define V_PAVGB3(mmr1, mmr2, mmrw, smask, ssetyp) V_PAVGB_##ssetyp(mmr1, mmr2, mmrw, smask)
|
||||
#define V_PAVGB_MMXEXT(mmr1, mmr2, mmrw, smask) "pavgb "mmr2", "mmr1"\n\t"
|
||||
#define V_PAVGB_TDNOW(mmr1, mmr2, mmrw, smask) "pavgusb "mmr2", "mmr1"\n\t"
|
||||
#define V_PAVGB(mmr1, mmr2, mmrw, smask) V_PAVGB2(mmr1, mmr2, mmrw, smask, SIMD_TYPE)
|
||||
#define V_PAVGB2(mmr1, mmr2, mmrw, smask, simdtype) V_PAVGB3(mmr1, mmr2, mmrw, smask, simdtype)
|
||||
#define V_PAVGB3(mmr1, mmr2, mmrw, smask, simdtype) V_PAVGB_##simdtype(mmr1, mmr2, mmrw, smask)
|
||||
|
||||
// some macros for pmaxub instruction
|
||||
#define V_PMAXUB_MMX(mmr1, mmr2) \
|
||||
"psubusb "mmr2", "mmr1"\n\t" \
|
||||
"paddusb "mmr2", "mmr1"\n\t"
|
||||
#define V_PMAXUB_SSE(mmr1, mmr2) "pmaxub "mmr2", "mmr1"\n\t"
|
||||
#define V_PMAXUB_3DNOW(mmr1, mmr2) V_PMAXUB_MMX(mmr1, mmr2) // use MMX version
|
||||
#define V_PMAXUB(mmr1, mmr2) V_PMAXUB2(mmr1, mmr2, SSE_TYPE)
|
||||
#define V_PMAXUB2(mmr1, mmr2, ssetyp) V_PMAXUB3(mmr1, mmr2, ssetyp)
|
||||
#define V_PMAXUB3(mmr1, mmr2, ssetyp) V_PMAXUB_##ssetyp(mmr1, mmr2)
|
||||
#define V_PMAXUB_MMXEXT(mmr1, mmr2) "pmaxub "mmr2", "mmr1"\n\t"
|
||||
#define V_PMAXUB_TDNOW(mmr1, mmr2) V_PMAXUB_MMX(mmr1, mmr2) // use MMX version
|
||||
#define V_PMAXUB(mmr1, mmr2) V_PMAXUB2(mmr1, mmr2, SIMD_TYPE)
|
||||
#define V_PMAXUB2(mmr1, mmr2, simdtype) V_PMAXUB3(mmr1, mmr2, simdtype)
|
||||
#define V_PMAXUB3(mmr1, mmr2, simdtype) V_PMAXUB_##simdtype(mmr1, mmr2)
|
||||
|
||||
// some macros for pminub instruction
|
||||
// V_PMINUB(mmr1, mmr2, mmr work register) mmr2 may NOT = mmrw
|
||||
|
@ -56,19 +56,19 @@
|
|||
"psubusb "mmr2", "mmrw"\n\t" \
|
||||
"paddusb "mmrw", "mmr1"\n\t" \
|
||||
"psubusb "mmrw", "mmr1"\n\t"
|
||||
#define V_PMINUB_SSE(mmr1, mmr2, mmrw) "pminub "mmr2", "mmr1"\n\t"
|
||||
#define V_PMINUB_3DNOW(mmr1, mmr2, mmrw) V_PMINUB_MMX(mmr1, mmr2, mmrw) // use MMX version
|
||||
#define V_PMINUB(mmr1, mmr2, mmrw) V_PMINUB2(mmr1, mmr2, mmrw, SSE_TYPE)
|
||||
#define V_PMINUB2(mmr1, mmr2, mmrw, ssetyp) V_PMINUB3(mmr1, mmr2, mmrw, ssetyp)
|
||||
#define V_PMINUB3(mmr1, mmr2, mmrw, ssetyp) V_PMINUB_##ssetyp(mmr1, mmr2, mmrw)
|
||||
#define V_PMINUB_MMXEXT(mmr1, mmr2, mmrw) "pminub "mmr2", "mmr1"\n\t"
|
||||
#define V_PMINUB_TDNOW(mmr1, mmr2, mmrw) V_PMINUB_MMX(mmr1, mmr2, mmrw) // use MMX version
|
||||
#define V_PMINUB(mmr1, mmr2, mmrw) V_PMINUB2(mmr1, mmr2, mmrw, SIMD_TYPE)
|
||||
#define V_PMINUB2(mmr1, mmr2, mmrw, simdtype) V_PMINUB3(mmr1, mmr2, mmrw, simdtype)
|
||||
#define V_PMINUB3(mmr1, mmr2, mmrw, simdtype) V_PMINUB_##simdtype(mmr1, mmr2, mmrw)
|
||||
|
||||
// some macros for movntq instruction
|
||||
// V_MOVNTQ(mmr1, mmr2)
|
||||
#define V_MOVNTQ_MMX(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
|
||||
#define V_MOVNTQ_3DNOW(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
|
||||
#define V_MOVNTQ_SSE(mmr1, mmr2) "movntq "mmr2", "mmr1"\n\t"
|
||||
#define V_MOVNTQ(mmr1, mmr2) V_MOVNTQ2(mmr1, mmr2, SSE_TYPE)
|
||||
#define V_MOVNTQ2(mmr1, mmr2, ssetyp) V_MOVNTQ3(mmr1, mmr2, ssetyp)
|
||||
#define V_MOVNTQ3(mmr1, mmr2, ssetyp) V_MOVNTQ_##ssetyp(mmr1, mmr2)
|
||||
#define V_MOVNTQ_TDNOW(mmr1, mmr2) "movq "mmr2", "mmr1"\n\t"
|
||||
#define V_MOVNTQ_MMXEXT(mmr1, mmr2) "movntq "mmr2", "mmr1"\n\t"
|
||||
#define V_MOVNTQ(mmr1, mmr2) V_MOVNTQ2(mmr1, mmr2, SIMD_TYPE)
|
||||
#define V_MOVNTQ2(mmr1, mmr2, simdtype) V_MOVNTQ3(mmr1, mmr2, simdtype)
|
||||
#define V_MOVNTQ3(mmr1, mmr2, simdtype) V_MOVNTQ_##simdtype(mmr1, mmr2)
|
||||
|
||||
// end of macros
|
||||
|
|
Loading…
Reference in a new issue