mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-18 15:51:11 +00:00
gst/deinterlace2/tvtime/greedy.c: Add plain MMX implementation for the greedyl method.
Original commit message from CVS: * gst/deinterlace2/tvtime/greedy.c: (deinterlace_greedy_packed422_scanline_mmx), (deinterlace_greedy_packed422_scanline): Add plain MMX implementation for the greedyl method.
This commit is contained in:
parent
e02d4969cb
commit
501437acf9
2 changed files with 111 additions and 0 deletions
|
@ -1,3 +1,10 @@
|
|||
2008-06-24 Sebastian Dröge <sebastian.droege@collabora.co.uk>
|
||||
|
||||
* gst/deinterlace2/tvtime/greedy.c:
|
||||
(deinterlace_greedy_packed422_scanline_mmx),
|
||||
(deinterlace_greedy_packed422_scanline):
|
||||
Add plain MMX implementation for the greedyl method.
|
||||
|
||||
2008-06-24 Sebastian Dröge <sebastian.droege@collabora.co.uk>
|
||||
|
||||
* gst/deinterlace2/Makefile.am:
|
||||
|
|
|
@ -111,6 +111,107 @@ deinterlace_greedy_packed422_scanline_c (GstDeinterlace2 * object,
|
|||
|
||||
#ifdef HAVE_CPU_I386
|
||||
#include "mmx.h"
|
||||
static void
|
||||
deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
||||
uint8_t * m0, uint8_t * t1, uint8_t * b1, uint8_t * m2, uint8_t * output,
|
||||
int width)
|
||||
{
|
||||
mmx_t MaxComb;
|
||||
|
||||
// How badly do we let it weave? 0-255
|
||||
MaxComb.ub[0] = GreedyMaxComb;
|
||||
MaxComb.ub[1] = GreedyMaxComb;
|
||||
MaxComb.ub[2] = GreedyMaxComb;
|
||||
MaxComb.ub[3] = GreedyMaxComb;
|
||||
MaxComb.ub[4] = GreedyMaxComb;
|
||||
MaxComb.ub[5] = GreedyMaxComb;
|
||||
MaxComb.ub[6] = GreedyMaxComb;
|
||||
MaxComb.ub[7] = GreedyMaxComb;
|
||||
|
||||
// L2 == m0
|
||||
// L1 == t1
|
||||
// L3 == b1
|
||||
// LP2 == m2
|
||||
|
||||
for (; width > 7; width -= 8) {
|
||||
movq_m2r (*t1, mm1); // L1
|
||||
movq_m2r (*m0, mm2); // L2
|
||||
movq_m2r (*b1, mm3); // L3
|
||||
movq_m2r (*m2, mm0); // LP2
|
||||
|
||||
// average L1 and L3 leave result in mm4
|
||||
movq_r2r (mm1, mm4); // L1
|
||||
movq_r2r (mm3, mm5); // L3
|
||||
psrlw_i2r (1, mm4); // L1/2
|
||||
psrlw_i2r (1, mm5); // L3/2
|
||||
paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
|
||||
|
||||
// get abs value of possible L2 comb
|
||||
movq_r2r (mm2, mm7); // L2
|
||||
psubusb_r2r (mm4, mm7); // L2 - avg
|
||||
movq_r2r (mm4, mm5); // avg
|
||||
psubusb_r2r (mm2, mm5); // avg - L2
|
||||
por_r2r (mm7, mm5); // abs(avg-L2)
|
||||
|
||||
|
||||
// get abs value of possible LP2 comb
|
||||
movq_r2r (mm0, mm7); // LP2
|
||||
psubusb_r2r (mm4, mm7); // LP2 - avg
|
||||
psubusb_r2r (mm0, mm4); // avg - LP2
|
||||
por_r2r (mm7, mm4); // abs(avg-LP2)
|
||||
|
||||
// use L2 or LP2 depending upon which makes smaller comb
|
||||
psubusb_r2r (mm5, mm4); // see if it goes to zero
|
||||
psubusb_r2r (mm5, mm5); // 0
|
||||
pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
|
||||
pcmpeqb_r2r (mm4, mm5); // opposite of mm4
|
||||
|
||||
// if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
|
||||
pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
|
||||
pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
|
||||
por_r2r (mm5, mm4); // may the best win
|
||||
|
||||
// Now lets clip our chosen value to be not outside of the range
|
||||
// of the high/low range L1-L3 by more than abs(L1-L3)
|
||||
// This allows some comb but limits the damages and also allows more
|
||||
// detail than a boring oversmoothed clip.
|
||||
|
||||
movq_r2r (mm1, mm2); // copy L1
|
||||
psubusb_r2r (mm3, mm2); // - L3, with saturation
|
||||
paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
|
||||
|
||||
pcmpeqb_r2r (mm7, mm7); // all ffffffff
|
||||
psubusb_r2r (mm1, mm7); // - L1
|
||||
paddusb_r2r (mm7, mm3); // add, may sat at fff..
|
||||
psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
paddusb_m2r (MaxComb, mm2); // increase max by diff
|
||||
psubusb_m2r (MaxComb, mm3); // lower min by diff
|
||||
|
||||
psubusb_r2r (mm3, mm4); // best - Min
|
||||
paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
|
||||
|
||||
pcmpeqb_r2r (mm7, mm7); // all ffffffff
|
||||
psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
|
||||
paddusb_r2r (mm7, mm2); // add may sat at FFF..
|
||||
psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
|
||||
|
||||
movq_r2m (mm2, *output); // move in our clipped best
|
||||
|
||||
// Advance to the next set of pixels.
|
||||
output += 8;
|
||||
m0 += 8;
|
||||
t1 += 8;
|
||||
b1 += 8;
|
||||
m2 += 8;
|
||||
}
|
||||
emms ();
|
||||
if (width > 0)
|
||||
deinterlace_greedy_packed422_scanline_c (object, m0, t1, b1, m2, output,
|
||||
width);
|
||||
}
|
||||
|
||||
#include "sse.h"
|
||||
|
||||
static void
|
||||
|
@ -214,6 +315,9 @@ deinterlace_greedy_packed422_scanline (GstDeinterlace2 * object,
|
|||
if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMXEXT) {
|
||||
deinterlace_greedy_packed422_scanline_mmxext (object, data->m0, data->t1,
|
||||
data->b1, data->m2, output, 2 * object->frame_width);
|
||||
} else if (object->cpu_feature_flags & OIL_IMPL_FLAG_MMX) {
|
||||
deinterlace_greedy_packed422_scanline_mmx (object, data->m0, data->t1,
|
||||
data->b1, data->m2, output, 2 * object->frame_width);
|
||||
} else {
|
||||
deinterlace_greedy_packed422_scanline_c (object, data->m0, data->t1,
|
||||
data->b1, data->m2, output, 2 * object->frame_width);
|
||||
|
|
Loading…
Reference in a new issue