mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-11-18 07:47:17 +00:00
[MOVED FROM BAD 13/56] gst/deinterlace2/tvtime/greedy.c: Optimize MMX/MMXEXT implementations a bit by requiring two less memory accesses and...
Original commit message from CVS: * gst/deinterlace2/tvtime/greedy.c: (deinterlace_greedy_packed422_scanline_mmx), (deinterlace_greedy_packed422_scanline_mmxext): Optimize MMX/MMXEXT implementations a bit by requiring two less memory accesses and fix the workaround for the missing right shift on bytes to unset the highest bit of every byte.
This commit is contained in:
parent
72f7b15739
commit
229f6a379b
1 changed files with 22 additions and 6 deletions
|
@ -118,6 +118,8 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
{
|
||||
mmx_t MaxComb;
|
||||
|
||||
mmx_t ShiftMask;
|
||||
|
||||
// How badly do we let it weave? 0-255
|
||||
MaxComb.ub[0] = GreedyMaxComb;
|
||||
MaxComb.ub[1] = GreedyMaxComb;
|
||||
|
@ -128,10 +130,21 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
MaxComb.ub[6] = GreedyMaxComb;
|
||||
MaxComb.ub[7] = GreedyMaxComb;
|
||||
|
||||
ShiftMask.ub[0] = 0x7f;
|
||||
ShiftMask.ub[1] = 0x7f;
|
||||
ShiftMask.ub[2] = 0x7f;
|
||||
ShiftMask.ub[3] = 0x7f;
|
||||
ShiftMask.ub[4] = 0x7f;
|
||||
ShiftMask.ub[5] = 0x7f;
|
||||
ShiftMask.ub[6] = 0x7f;
|
||||
ShiftMask.ub[7] = 0x7f;
|
||||
|
||||
// L2 == m0
|
||||
// L1 == t1
|
||||
// L3 == b1
|
||||
// LP2 == m2
|
||||
// LP2 == m2
|
||||
|
||||
movq_m2r (MaxComb, mm6);
|
||||
|
||||
for (; width > 7; width -= 8) {
|
||||
movq_m2r (*t1, mm1); // L1
|
||||
|
@ -143,7 +156,9 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
movq_r2r (mm1, mm4); // L1
|
||||
movq_r2r (mm3, mm5); // L3
|
||||
psrlw_i2r (1, mm4); // L1/2
|
||||
pand_m2r (ShiftMask, mm4);
|
||||
psrlw_i2r (1, mm5); // L3/2
|
||||
pand_m2r (ShiftMask, mm5);
|
||||
paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
|
||||
|
||||
// get abs value of possible L2 comb
|
||||
|
@ -153,7 +168,6 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
psubusb_r2r (mm2, mm5); // avg - L2
|
||||
por_r2r (mm7, mm5); // abs(avg-L2)
|
||||
|
||||
|
||||
// get abs value of possible LP2 comb
|
||||
movq_r2r (mm0, mm7); // LP2
|
||||
psubusb_r2r (mm4, mm7); // LP2 - avg
|
||||
|
@ -186,8 +200,8 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
paddusb_m2r (MaxComb, mm2); // increase max by diff
|
||||
psubusb_m2r (MaxComb, mm3); // lower min by diff
|
||||
paddusb_r2r (mm6, mm2); // increase max by diff
|
||||
psubusb_r2r (mm6, mm3); // lower min by diff
|
||||
|
||||
psubusb_r2r (mm3, mm4); // best - Min
|
||||
paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
|
||||
|
@ -236,6 +250,8 @@ deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object,
|
|||
// L3 == b1
|
||||
// LP2 == m2
|
||||
|
||||
movq_m2r (MaxComb, mm6);
|
||||
|
||||
for (; width > 7; width -= 8) {
|
||||
movq_m2r (*t1, mm1); // L1
|
||||
movq_m2r (*m0, mm2); // L2
|
||||
|
@ -281,8 +297,8 @@ deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object,
|
|||
pminub_r2r (mm1, mm3); // now = Min(L1,L3)
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
paddusb_m2r (MaxComb, mm2); // increase max by diff
|
||||
psubusb_m2r (MaxComb, mm3); // lower min by diff
|
||||
paddusb_r2r (mm6, mm2); // increase max by diff
|
||||
psubusb_r2r (mm6, mm3); // lower min by diff
|
||||
|
||||
|
||||
pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
|
||||
|
|
Loading…
Reference in a new issue