mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-19 05:45:58 +00:00
gst/deinterlace2/tvtime/greedy.c: Optimize MMX/MMXEXT implementations a bit by requiring two less memory accesses and...
Original commit message from CVS: * gst/deinterlace2/tvtime/greedy.c: (deinterlace_greedy_packed422_scanline_mmx), (deinterlace_greedy_packed422_scanline_mmxext): Optimize MMX/MMXEXT implementations a bit by requiring two less memory accesses and fix the workaround for the missing right shift on bytes to unset the highest bit of every byte.
This commit is contained in:
parent
b64c5620fc
commit
00e4195fec
2 changed files with 31 additions and 6 deletions
|
@ -1,3 +1,12 @@
|
|||
2008-06-24 Sebastian Dröge <sebastian.droege@collabora.co.uk>
|
||||
|
||||
* gst/deinterlace2/tvtime/greedy.c:
|
||||
(deinterlace_greedy_packed422_scanline_mmx),
|
||||
(deinterlace_greedy_packed422_scanline_mmxext):
|
||||
Optimize MMX/MMXEXT implementations a bit by requiring two less
|
||||
memory accesses and fix the workaround for the missing right shift
|
||||
on bytes to unset the highest bit of every byte.
|
||||
|
||||
2008-06-24 Sebastian Dröge <sebastian.droege@collabora.co.uk>
|
||||
|
||||
* gst/deinterlace2/tvtime/greedy.c:
|
||||
|
|
|
@ -118,6 +118,8 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
{
|
||||
mmx_t MaxComb;
|
||||
|
||||
mmx_t ShiftMask;
|
||||
|
||||
// How badly do we let it weave? 0-255
|
||||
MaxComb.ub[0] = GreedyMaxComb;
|
||||
MaxComb.ub[1] = GreedyMaxComb;
|
||||
|
@ -128,10 +130,21 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
MaxComb.ub[6] = GreedyMaxComb;
|
||||
MaxComb.ub[7] = GreedyMaxComb;
|
||||
|
||||
ShiftMask.ub[0] = 0x7f;
|
||||
ShiftMask.ub[1] = 0x7f;
|
||||
ShiftMask.ub[2] = 0x7f;
|
||||
ShiftMask.ub[3] = 0x7f;
|
||||
ShiftMask.ub[4] = 0x7f;
|
||||
ShiftMask.ub[5] = 0x7f;
|
||||
ShiftMask.ub[6] = 0x7f;
|
||||
ShiftMask.ub[7] = 0x7f;
|
||||
|
||||
// L2 == m0
|
||||
// L1 == t1
|
||||
// L3 == b1
|
||||
// LP2 == m2
|
||||
// LP2 == m2
|
||||
|
||||
movq_m2r (MaxComb, mm6);
|
||||
|
||||
for (; width > 7; width -= 8) {
|
||||
movq_m2r (*t1, mm1); // L1
|
||||
|
@ -143,7 +156,9 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
movq_r2r (mm1, mm4); // L1
|
||||
movq_r2r (mm3, mm5); // L3
|
||||
psrlw_i2r (1, mm4); // L1/2
|
||||
pand_m2r (ShiftMask, mm4);
|
||||
psrlw_i2r (1, mm5); // L3/2
|
||||
pand_m2r (ShiftMask, mm5);
|
||||
paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
|
||||
|
||||
// get abs value of possible L2 comb
|
||||
|
@ -153,7 +168,6 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
psubusb_r2r (mm2, mm5); // avg - L2
|
||||
por_r2r (mm7, mm5); // abs(avg-L2)
|
||||
|
||||
|
||||
// get abs value of possible LP2 comb
|
||||
movq_r2r (mm0, mm7); // LP2
|
||||
psubusb_r2r (mm4, mm7); // LP2 - avg
|
||||
|
@ -186,8 +200,8 @@ deinterlace_greedy_packed422_scanline_mmx (GstDeinterlace2 * object,
|
|||
psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
paddusb_m2r (MaxComb, mm2); // increase max by diff
|
||||
psubusb_m2r (MaxComb, mm3); // lower min by diff
|
||||
paddusb_r2r (mm6, mm2); // increase max by diff
|
||||
psubusb_r2r (mm6, mm3); // lower min by diff
|
||||
|
||||
psubusb_r2r (mm3, mm4); // best - Min
|
||||
paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
|
||||
|
@ -236,6 +250,8 @@ deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object,
|
|||
// L3 == b1
|
||||
// LP2 == m2
|
||||
|
||||
movq_m2r (MaxComb, mm6);
|
||||
|
||||
for (; width > 7; width -= 8) {
|
||||
movq_m2r (*t1, mm1); // L1
|
||||
movq_m2r (*m0, mm2); // L2
|
||||
|
@ -281,8 +297,8 @@ deinterlace_greedy_packed422_scanline_mmxext (GstDeinterlace2 * object,
|
|||
pminub_r2r (mm1, mm3); // now = Min(L1,L3)
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
paddusb_m2r (MaxComb, mm2); // increase max by diff
|
||||
psubusb_m2r (MaxComb, mm3); // lower min by diff
|
||||
paddusb_r2r (mm6, mm2); // increase max by diff
|
||||
psubusb_r2r (mm6, mm3); // lower min by diff
|
||||
|
||||
|
||||
pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
|
||||
|
|
Loading…
Reference in a new issue