[MOVED FROM BAD 13/56] gst/deinterlace2/tvtime/greedy.c: Optimize MMX/MMXEXT implementations a bit by requiring two less memory accesses and...

Original commit message from CVS:
* gst/deinterlace2/tvtime/greedy.c:
(deinterlace_greedy_packed422_scanline_mmx),
(deinterlace_greedy_packed422_scanline_mmxext):
Optimize MMX/MMXEXT implementations a bit by requiring two less
memory accesses and fix the workaround for the missing right shift
on bytes to unset the highest bit of every byte.
This commit is contained in:
Sebastian Dröge 2008-06-24 12:08:47 +00:00 committed by Sebastian Dröge
parent 72f7b15739
commit 229f6a379b

View file

@ -118,6 +118,8 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
{
mmx_t MaxComb;
mmx_t ShiftMask;
// How badly do we let it weave? 0-255
MaxComb.ub[0] = GreedyMaxComb;
MaxComb.ub[1] = GreedyMaxComb;
@ -128,10 +130,21 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
MaxComb.ub[6] = GreedyMaxComb;
MaxComb.ub[7] = GreedyMaxComb;
ShiftMask.ub[0] = 0x7f;
ShiftMask.ub[1] = 0x7f;
ShiftMask.ub[2] = 0x7f;
ShiftMask.ub[3] = 0x7f;
ShiftMask.ub[4] = 0x7f;
ShiftMask.ub[5] = 0x7f;
ShiftMask.ub[6] = 0x7f;
ShiftMask.ub[7] = 0x7f;
// L2 == m0
// L1 == t1
// L3 == b1
// LP2 == m2
// LP2 == m2
movq_m2r (MaxComb, mm6);
for (; width > 7; width -= 8) {
movq_m2r (*t1, mm1); // L1
@ -143,7 +156,9 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
movq_r2r (mm1, mm4); // L1
movq_r2r (mm3, mm5); // L3
psrlw_i2r (1, mm4); // L1/2
pand_m2r (ShiftMask, mm4);
psrlw_i2r (1, mm5); // L3/2
pand_m2r (ShiftMask, mm5);
paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
// get abs value of possible L2 comb
@ -153,7 +168,6 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
psubusb_r2r (mm2, mm5); // avg - L2
por_r2r (mm7, mm5); // abs(avg-L2)
// get abs value of possible LP2 comb
movq_r2r (mm0, mm7); // LP2
psubusb_r2r (mm4, mm7); // LP2 - avg
@ -186,8 +200,8 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
// allow the value to be above the high or below the low by amt of MaxComb
paddusb_m2r (MaxComb, mm2); // increase max by diff
psubusb_m2r (MaxComb, mm3); // lower min by diff
paddusb_r2r (mm6, mm2); // increase max by diff
psubusb_r2r (mm6, mm3); // lower min by diff
psubusb_r2r (mm3, mm4); // best - Min
paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
@ -236,6 +250,8 @@ deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object,
// L3 == b1
// LP2 == m2
movq_m2r (MaxComb, mm6);
for (; width > 7; width -= 8) {
movq_m2r (*t1, mm1); // L1
movq_m2r (*m0, mm2); // L2
@ -281,8 +297,8 @@ deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object,
pminub_r2r (mm1, mm3); // now = Min(L1,L3)
// allow the value to be above the high or below the low by amt of MaxComb
paddusb_m2r (MaxComb, mm2); // increase max by diff
psubusb_m2r (MaxComb, mm3); // lower min by diff
paddusb_r2r (mm6, mm2); // increase max by diff
psubusb_r2r (mm6, mm3); // lower min by diff
pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)