mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-26 09:08:14 +00:00
deinterlace: Fix greedyl Orc implementation
To agree with the previous C/asm code.
This commit is contained in:
parent
de8fda9cc2
commit
6143a60bdb
3 changed files with 152 additions and 458 deletions
|
@ -688,10 +688,6 @@ deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1,
|
|||
const orc_int8 *ORC_RESTRICT ptr5;
|
||||
const orc_int8 *ORC_RESTRICT ptr6;
|
||||
const orc_int8 *ORC_RESTRICT ptr7;
|
||||
orc_int8 var40;
|
||||
orc_int8 var41;
|
||||
orc_int8 var42;
|
||||
orc_int8 var43;
|
||||
orc_int8 var44;
|
||||
orc_int8 var45;
|
||||
orc_int8 var46;
|
||||
|
@ -719,10 +715,6 @@ deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1,
|
|||
orc_int8 var68;
|
||||
orc_int8 var69;
|
||||
orc_int8 var70;
|
||||
orc_int8 var71;
|
||||
orc_int8 var72;
|
||||
orc_int8 var73;
|
||||
orc_int8 var74;
|
||||
|
||||
ptr0 = (orc_int8 *) d1;
|
||||
ptr4 = (orc_int8 *) s1;
|
||||
|
@ -730,80 +722,64 @@ deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1,
|
|||
ptr6 = (orc_int8 *) s3;
|
||||
ptr7 = (orc_int8 *) s4;
|
||||
|
||||
/* 11: loadpb */
|
||||
var44 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 13: loadpb */
|
||||
var46 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 15: loadpb */
|
||||
var47 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 29: loadpb */
|
||||
var54 = p1;
|
||||
/* 31: loadpb */
|
||||
var55 = p1;
|
||||
var45 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 21: loadpb */
|
||||
var46 = p1;
|
||||
/* 23: loadpb */
|
||||
var47 = p1;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
/* 0: loadb */
|
||||
var40 = ptr5[i];
|
||||
var49 = ptr4[i];
|
||||
/* 1: loadb */
|
||||
var41 = ptr6[i];
|
||||
/* 2: avgub */
|
||||
var57 = ((orc_uint8) var40 + (orc_uint8) var41 + 1) >> 1;
|
||||
/* 3: loadb */
|
||||
var42 = ptr4[i];
|
||||
/* 4: maxub */
|
||||
var58 = ORC_MAX ((orc_uint8) var42, (orc_uint8) var57);
|
||||
/* 5: loadb */
|
||||
var43 = ptr4[i];
|
||||
/* 6: minub */
|
||||
var59 = ORC_MIN ((orc_uint8) var43, (orc_uint8) var57);
|
||||
/* 7: subb */
|
||||
var60 = var58 - var59;
|
||||
/* 8: loadb */
|
||||
var44 = ptr7[i];
|
||||
/* 9: maxub */
|
||||
var61 = ORC_MAX ((orc_uint8) var44, (orc_uint8) var57);
|
||||
/* 10: loadb */
|
||||
var45 = ptr7[i];
|
||||
/* 11: minub */
|
||||
var62 = ORC_MIN ((orc_uint8) var45, (orc_uint8) var57);
|
||||
/* 12: subb */
|
||||
var63 = var61 - var62;
|
||||
/* 14: xorb */
|
||||
var64 = var60 ^ var46;
|
||||
/* 16: xorb */
|
||||
var65 = var63 ^ var47;
|
||||
/* 17: cmpgtsb */
|
||||
var66 = (var64 > var65) ? (~0) : 0;
|
||||
/* 18: loadb */
|
||||
var48 = ptr4[i];
|
||||
/* 19: andb */
|
||||
var67 = var48 & var66;
|
||||
/* 20: loadb */
|
||||
var49 = ptr7[i];
|
||||
/* 21: andnb */
|
||||
var68 = (~var49) & var66;
|
||||
/* 22: orb */
|
||||
var69 = var67 | var68;
|
||||
/* 23: loadb */
|
||||
var50 = ptr5[i];
|
||||
/* 24: loadb */
|
||||
var50 = ptr7[i];
|
||||
/* 2: loadb */
|
||||
var51 = ptr6[i];
|
||||
/* 25: maxub */
|
||||
var70 = ORC_MAX ((orc_uint8) var50, (orc_uint8) var51);
|
||||
/* 26: loadb */
|
||||
/* 3: loadb */
|
||||
var52 = ptr5[i];
|
||||
/* 27: loadb */
|
||||
var53 = ptr6[i];
|
||||
/* 28: minub */
|
||||
var71 = ORC_MIN ((orc_uint8) var52, (orc_uint8) var53);
|
||||
/* 30: addusb */
|
||||
var72 = ORC_CLAMP_UB ((orc_uint8) var70 + (orc_uint8) var54);
|
||||
/* 32: subusb */
|
||||
var73 = ORC_CLAMP_UB ((orc_uint8) var71 - (orc_uint8) var55);
|
||||
/* 33: minub */
|
||||
var74 = ORC_MIN ((orc_uint8) var69, (orc_uint8) var72);
|
||||
/* 34: maxub */
|
||||
var56 = ORC_MAX ((orc_uint8) var74, (orc_uint8) var73);
|
||||
/* 35: storeb */
|
||||
ptr0[i] = var56;
|
||||
/* 4: avgub */
|
||||
var53 = ((orc_uint8) var52 + (orc_uint8) var51 + 1) >> 1;
|
||||
/* 5: maxub */
|
||||
var54 = ORC_MAX ((orc_uint8) var49, (orc_uint8) var53);
|
||||
/* 6: minub */
|
||||
var55 = ORC_MIN ((orc_uint8) var49, (orc_uint8) var53);
|
||||
/* 7: subb */
|
||||
var56 = var54 - var55;
|
||||
/* 8: maxub */
|
||||
var57 = ORC_MAX ((orc_uint8) var50, (orc_uint8) var53);
|
||||
/* 9: minub */
|
||||
var58 = ORC_MIN ((orc_uint8) var50, (orc_uint8) var53);
|
||||
/* 10: subb */
|
||||
var59 = var57 - var58;
|
||||
/* 12: xorb */
|
||||
var60 = var56 ^ var44;
|
||||
/* 14: xorb */
|
||||
var61 = var59 ^ var45;
|
||||
/* 15: cmpgtsb */
|
||||
var62 = (var60 > var61) ? (~0) : 0;
|
||||
/* 16: andb */
|
||||
var63 = var50 & var62;
|
||||
/* 17: andnb */
|
||||
var64 = (~var62) & var49;
|
||||
/* 18: orb */
|
||||
var65 = var63 | var64;
|
||||
/* 19: maxub */
|
||||
var66 = ORC_MAX ((orc_uint8) var52, (orc_uint8) var51);
|
||||
/* 20: minub */
|
||||
var67 = ORC_MIN ((orc_uint8) var52, (orc_uint8) var51);
|
||||
/* 22: addusb */
|
||||
var68 = ORC_CLAMP_UB ((orc_uint8) var66 + (orc_uint8) var46);
|
||||
/* 24: subusb */
|
||||
var69 = ORC_CLAMP_UB ((orc_uint8) var67 - (orc_uint8) var47);
|
||||
/* 25: minub */
|
||||
var70 = ORC_MIN ((orc_uint8) var65, (orc_uint8) var68);
|
||||
/* 26: maxub */
|
||||
var48 = ORC_MAX ((orc_uint8) var70, (orc_uint8) var69);
|
||||
/* 27: storeb */
|
||||
ptr0[i] = var48;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -819,10 +795,6 @@ _backup_deinterlace_line_greedy (OrcExecutor * ORC_RESTRICT ex)
|
|||
const orc_int8 *ORC_RESTRICT ptr5;
|
||||
const orc_int8 *ORC_RESTRICT ptr6;
|
||||
const orc_int8 *ORC_RESTRICT ptr7;
|
||||
orc_int8 var40;
|
||||
orc_int8 var41;
|
||||
orc_int8 var42;
|
||||
orc_int8 var43;
|
||||
orc_int8 var44;
|
||||
orc_int8 var45;
|
||||
orc_int8 var46;
|
||||
|
@ -850,10 +822,6 @@ _backup_deinterlace_line_greedy (OrcExecutor * ORC_RESTRICT ex)
|
|||
orc_int8 var68;
|
||||
orc_int8 var69;
|
||||
orc_int8 var70;
|
||||
orc_int8 var71;
|
||||
orc_int8 var72;
|
||||
orc_int8 var73;
|
||||
orc_int8 var74;
|
||||
|
||||
ptr0 = (orc_int8 *) ex->arrays[0];
|
||||
ptr4 = (orc_int8 *) ex->arrays[4];
|
||||
|
@ -861,80 +829,64 @@ _backup_deinterlace_line_greedy (OrcExecutor * ORC_RESTRICT ex)
|
|||
ptr6 = (orc_int8 *) ex->arrays[6];
|
||||
ptr7 = (orc_int8 *) ex->arrays[7];
|
||||
|
||||
/* 11: loadpb */
|
||||
var44 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 13: loadpb */
|
||||
var46 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 15: loadpb */
|
||||
var47 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 29: loadpb */
|
||||
var54 = ex->params[24];
|
||||
/* 31: loadpb */
|
||||
var55 = ex->params[24];
|
||||
var45 = 0x00000080; /* 128 or 6.32404e-322f */
|
||||
/* 21: loadpb */
|
||||
var46 = ex->params[24];
|
||||
/* 23: loadpb */
|
||||
var47 = ex->params[24];
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
/* 0: loadb */
|
||||
var40 = ptr5[i];
|
||||
var49 = ptr4[i];
|
||||
/* 1: loadb */
|
||||
var41 = ptr6[i];
|
||||
/* 2: avgub */
|
||||
var57 = ((orc_uint8) var40 + (orc_uint8) var41 + 1) >> 1;
|
||||
/* 3: loadb */
|
||||
var42 = ptr4[i];
|
||||
/* 4: maxub */
|
||||
var58 = ORC_MAX ((orc_uint8) var42, (orc_uint8) var57);
|
||||
/* 5: loadb */
|
||||
var43 = ptr4[i];
|
||||
/* 6: minub */
|
||||
var59 = ORC_MIN ((orc_uint8) var43, (orc_uint8) var57);
|
||||
/* 7: subb */
|
||||
var60 = var58 - var59;
|
||||
/* 8: loadb */
|
||||
var44 = ptr7[i];
|
||||
/* 9: maxub */
|
||||
var61 = ORC_MAX ((orc_uint8) var44, (orc_uint8) var57);
|
||||
/* 10: loadb */
|
||||
var45 = ptr7[i];
|
||||
/* 11: minub */
|
||||
var62 = ORC_MIN ((orc_uint8) var45, (orc_uint8) var57);
|
||||
/* 12: subb */
|
||||
var63 = var61 - var62;
|
||||
/* 14: xorb */
|
||||
var64 = var60 ^ var46;
|
||||
/* 16: xorb */
|
||||
var65 = var63 ^ var47;
|
||||
/* 17: cmpgtsb */
|
||||
var66 = (var64 > var65) ? (~0) : 0;
|
||||
/* 18: loadb */
|
||||
var48 = ptr4[i];
|
||||
/* 19: andb */
|
||||
var67 = var48 & var66;
|
||||
/* 20: loadb */
|
||||
var49 = ptr7[i];
|
||||
/* 21: andnb */
|
||||
var68 = (~var49) & var66;
|
||||
/* 22: orb */
|
||||
var69 = var67 | var68;
|
||||
/* 23: loadb */
|
||||
var50 = ptr5[i];
|
||||
/* 24: loadb */
|
||||
var50 = ptr7[i];
|
||||
/* 2: loadb */
|
||||
var51 = ptr6[i];
|
||||
/* 25: maxub */
|
||||
var70 = ORC_MAX ((orc_uint8) var50, (orc_uint8) var51);
|
||||
/* 26: loadb */
|
||||
/* 3: loadb */
|
||||
var52 = ptr5[i];
|
||||
/* 27: loadb */
|
||||
var53 = ptr6[i];
|
||||
/* 28: minub */
|
||||
var71 = ORC_MIN ((orc_uint8) var52, (orc_uint8) var53);
|
||||
/* 30: addusb */
|
||||
var72 = ORC_CLAMP_UB ((orc_uint8) var70 + (orc_uint8) var54);
|
||||
/* 32: subusb */
|
||||
var73 = ORC_CLAMP_UB ((orc_uint8) var71 - (orc_uint8) var55);
|
||||
/* 33: minub */
|
||||
var74 = ORC_MIN ((orc_uint8) var69, (orc_uint8) var72);
|
||||
/* 34: maxub */
|
||||
var56 = ORC_MAX ((orc_uint8) var74, (orc_uint8) var73);
|
||||
/* 35: storeb */
|
||||
ptr0[i] = var56;
|
||||
/* 4: avgub */
|
||||
var53 = ((orc_uint8) var52 + (orc_uint8) var51 + 1) >> 1;
|
||||
/* 5: maxub */
|
||||
var54 = ORC_MAX ((orc_uint8) var49, (orc_uint8) var53);
|
||||
/* 6: minub */
|
||||
var55 = ORC_MIN ((orc_uint8) var49, (orc_uint8) var53);
|
||||
/* 7: subb */
|
||||
var56 = var54 - var55;
|
||||
/* 8: maxub */
|
||||
var57 = ORC_MAX ((orc_uint8) var50, (orc_uint8) var53);
|
||||
/* 9: minub */
|
||||
var58 = ORC_MIN ((orc_uint8) var50, (orc_uint8) var53);
|
||||
/* 10: subb */
|
||||
var59 = var57 - var58;
|
||||
/* 12: xorb */
|
||||
var60 = var56 ^ var44;
|
||||
/* 14: xorb */
|
||||
var61 = var59 ^ var45;
|
||||
/* 15: cmpgtsb */
|
||||
var62 = (var60 > var61) ? (~0) : 0;
|
||||
/* 16: andb */
|
||||
var63 = var50 & var62;
|
||||
/* 17: andnb */
|
||||
var64 = (~var62) & var49;
|
||||
/* 18: orb */
|
||||
var65 = var63 | var64;
|
||||
/* 19: maxub */
|
||||
var66 = ORC_MAX ((orc_uint8) var52, (orc_uint8) var51);
|
||||
/* 20: minub */
|
||||
var67 = ORC_MIN ((orc_uint8) var52, (orc_uint8) var51);
|
||||
/* 22: addusb */
|
||||
var68 = ORC_CLAMP_UB ((orc_uint8) var66 + (orc_uint8) var46);
|
||||
/* 24: subusb */
|
||||
var69 = ORC_CLAMP_UB ((orc_uint8) var67 - (orc_uint8) var47);
|
||||
/* 25: minub */
|
||||
var70 = ORC_MIN ((orc_uint8) var65, (orc_uint8) var68);
|
||||
/* 26: maxub */
|
||||
var48 = ORC_MAX ((orc_uint8) var70, (orc_uint8) var69);
|
||||
/* 27: storeb */
|
||||
ptr0[i] = var48;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -972,44 +924,56 @@ deinterlace_line_greedy (orc_uint8 * d1, const orc_uint8 * s1,
|
|||
orc_program_add_temporary (p, 1, "t6");
|
||||
orc_program_add_temporary (p, 1, "t7");
|
||||
orc_program_add_temporary (p, 1, "t8");
|
||||
orc_program_add_temporary (p, 1, "t9");
|
||||
orc_program_add_temporary (p, 1, "t10");
|
||||
orc_program_add_temporary (p, 1, "t11");
|
||||
orc_program_add_temporary (p, 1, "t12");
|
||||
|
||||
orc_program_append_2 (p, "avgub", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3,
|
||||
orc_program_append_2 (p, "loadb", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "maxub", 0, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_T1,
|
||||
orc_program_append_2 (p, "loadb", 0, ORC_VAR_T2, ORC_VAR_S4, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "minub", 0, ORC_VAR_T5, ORC_VAR_S1, ORC_VAR_T1,
|
||||
orc_program_append_2 (p, "loadb", 0, ORC_VAR_T3, ORC_VAR_S3, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T2, ORC_VAR_T4, ORC_VAR_T5,
|
||||
orc_program_append_2 (p, "loadb", 0, ORC_VAR_T4, ORC_VAR_S2, ORC_VAR_D1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "maxub", 0, ORC_VAR_T4, ORC_VAR_S4, ORC_VAR_T1,
|
||||
orc_program_append_2 (p, "avgub", 0, ORC_VAR_T5, ORC_VAR_T4, ORC_VAR_T3,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "minub", 0, ORC_VAR_T5, ORC_VAR_S4, ORC_VAR_T1,
|
||||
orc_program_append_2 (p, "maxub", 0, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_T5,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_T5,
|
||||
orc_program_append_2 (p, "minub", 0, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_T5,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "xorb", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1,
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_T9,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "xorb", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_C1,
|
||||
orc_program_append_2 (p, "maxub", 0, ORC_VAR_T8, ORC_VAR_T2, ORC_VAR_T5,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "cmpgtsb", 0, ORC_VAR_T5, ORC_VAR_T2, ORC_VAR_T3,
|
||||
orc_program_append_2 (p, "minub", 0, ORC_VAR_T9, ORC_VAR_T2, ORC_VAR_T5,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "andb", 0, ORC_VAR_T4, ORC_VAR_S1, ORC_VAR_T5,
|
||||
orc_program_append_2 (p, "subb", 0, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_T9,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "andnb", 0, ORC_VAR_T5, ORC_VAR_S4, ORC_VAR_T5,
|
||||
orc_program_append_2 (p, "xorb", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "orb", 0, ORC_VAR_T6, ORC_VAR_T4, ORC_VAR_T5,
|
||||
orc_program_append_2 (p, "xorb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "maxub", 0, ORC_VAR_T8, ORC_VAR_S2, ORC_VAR_S3,
|
||||
orc_program_append_2 (p, "cmpgtsb", 0, ORC_VAR_T9, ORC_VAR_T6, ORC_VAR_T7,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "minub", 0, ORC_VAR_T7, ORC_VAR_S2, ORC_VAR_S3,
|
||||
orc_program_append_2 (p, "andb", 0, ORC_VAR_T8, ORC_VAR_T2, ORC_VAR_T9,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addusb", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_P1,
|
||||
orc_program_append_2 (p, "andnb", 0, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T1,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subusb", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_P1,
|
||||
orc_program_append_2 (p, "orb", 0, ORC_VAR_T10, ORC_VAR_T8, ORC_VAR_T9,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "minub", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T8,
|
||||
orc_program_append_2 (p, "maxub", 0, ORC_VAR_T12, ORC_VAR_T4, ORC_VAR_T3,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "maxub", 0, ORC_VAR_D1, ORC_VAR_T6, ORC_VAR_T7,
|
||||
orc_program_append_2 (p, "minub", 0, ORC_VAR_T11, ORC_VAR_T4, ORC_VAR_T3,
|
||||
ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "addusb", 0, ORC_VAR_T12, ORC_VAR_T12,
|
||||
ORC_VAR_P1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "subusb", 0, ORC_VAR_T11, ORC_VAR_T11,
|
||||
ORC_VAR_P1, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "minub", 0, ORC_VAR_T10, ORC_VAR_T10,
|
||||
ORC_VAR_T12, ORC_VAR_D1);
|
||||
orc_program_append_2 (p, "maxub", 0, ORC_VAR_D1, ORC_VAR_T10, ORC_VAR_T11,
|
||||
ORC_VAR_D1);
|
||||
|
||||
result = orc_program_compile (p);
|
||||
|
|
|
@ -61,6 +61,10 @@ convsuswb d1, t1
|
|||
.source 1 b1
|
||||
.source 1 m2
|
||||
.param 1 max_comb
|
||||
.temp 1 tm0
|
||||
.temp 1 tm2
|
||||
.temp 1 tb1
|
||||
.temp 1 tt1
|
||||
.temp 1 avg
|
||||
.temp 1 l2_diff
|
||||
.temp 1 lp2_diff
|
||||
|
@ -71,29 +75,31 @@ convsuswb d1, t1
|
|||
.temp 1 max
|
||||
|
||||
|
||||
avgub avg, t1, b1
|
||||
#absdiffb l2_diff, m0, avg
|
||||
maxub t2, m0, avg
|
||||
minub t3, m0, avg
|
||||
loadb tm0, m0
|
||||
loadb tm2, m2
|
||||
|
||||
loadb tb1, b1
|
||||
loadb tt1, t1
|
||||
avgub avg, tt1, tb1
|
||||
|
||||
maxub t2, tm0, avg
|
||||
minub t3, tm0, avg
|
||||
subb l2_diff, t2, t3
|
||||
|
||||
#absdiffb lp2_diff, m2, avg
|
||||
maxub t2, m2, avg
|
||||
minub t3, m2, avg
|
||||
maxub t2, tm2, avg
|
||||
minub t3, tm2, avg
|
||||
subb lp2_diff, t2, t3
|
||||
|
||||
#cmpgtub t1, l2_diff, lp2_diff
|
||||
xorb l2_diff, l2_diff, 0x80
|
||||
xorb lp2_diff, lp2_diff, 0x80
|
||||
cmpgtsb t3, l2_diff, lp2_diff
|
||||
|
||||
#selectb best, m0, m2, t3
|
||||
andb t2, m0, t3
|
||||
andnb t3, m2, t3
|
||||
andb t2, tm2, t3
|
||||
andnb t3, t3, tm0
|
||||
orb best, t2, t3
|
||||
|
||||
maxub max, t1, b1
|
||||
minub min, t1, b1
|
||||
maxub max, tt1, tb1
|
||||
minub min, tt1, tb1
|
||||
addusb max, max, max_comb
|
||||
subusb min, min, max_comb
|
||||
minub best, best, max
|
||||
|
|
|
@ -34,9 +34,6 @@
|
|||
|
||||
#include "gstdeinterlacemethod.h"
|
||||
#include <string.h>
|
||||
#ifdef HAVE_ORC
|
||||
#include <orc/orc.h>
|
||||
#endif
|
||||
#include "tvtime.h"
|
||||
|
||||
|
||||
|
@ -79,54 +76,6 @@ typedef struct
|
|||
// I'd intended this to be part of a larger more elaborate method added to
|
||||
// Blended Clip but this give too good results for the CPU to ignore here.
|
||||
|
||||
static inline void
|
||||
deinterlace_greedy_scanline_c (GstDeinterlaceMethodGreedyL * self,
|
||||
const guint8 * m0, const guint8 * t1,
|
||||
const guint8 * b1, const guint8 * m2, guint8 * output, gint width)
|
||||
{
|
||||
gint avg, l2_diff, lp2_diff, max, min, best;
|
||||
guint max_comb = self->max_comb;
|
||||
|
||||
// L2 == m0
|
||||
// L1 == t1
|
||||
// L3 == b1
|
||||
// LP2 == m2
|
||||
|
||||
while (width--) {
|
||||
avg = (*t1 + *b1) / 2;
|
||||
|
||||
l2_diff = ABS (*m0 - avg);
|
||||
lp2_diff = ABS (*m2 - avg);
|
||||
|
||||
if (l2_diff > lp2_diff)
|
||||
best = *m2;
|
||||
else
|
||||
best = *m0;
|
||||
|
||||
max = MAX (*t1, *b1);
|
||||
min = MIN (*t1, *b1);
|
||||
|
||||
if (max < 256 - max_comb)
|
||||
max += max_comb;
|
||||
else
|
||||
max = 255;
|
||||
|
||||
if (min > max_comb)
|
||||
min -= max_comb;
|
||||
else
|
||||
min = 0;
|
||||
|
||||
*output = CLAMP (best, min, max);
|
||||
|
||||
// Advance to the next set of pixels.
|
||||
output += 1;
|
||||
m0 += 1;
|
||||
t1 += 1;
|
||||
b1 += 1;
|
||||
m2 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
deinterlace_greedy_scanline_orc (GstDeinterlaceMethodGreedyL * self,
|
||||
const guint8 * m0, const guint8 * t1,
|
||||
|
@ -135,216 +84,6 @@ deinterlace_greedy_scanline_orc (GstDeinterlaceMethodGreedyL * self,
|
|||
deinterlace_line_greedy (output, m0, t1, b1, m2, self->max_comb, width);
|
||||
}
|
||||
|
||||
#ifdef BUILD_X86_ASM
|
||||
#include "mmx.h"
|
||||
static void
|
||||
deinterlace_greedy_scanline_mmx (GstDeinterlaceMethodGreedyL * self,
|
||||
const guint8 * m0, const guint8 * t1,
|
||||
const guint8 * b1, const guint8 * m2, guint8 * output, gint width)
|
||||
{
|
||||
mmx_t MaxComb;
|
||||
mmx_t ShiftMask;
|
||||
|
||||
// How badly do we let it weave? 0-255
|
||||
MaxComb.ub[0] = self->max_comb;
|
||||
MaxComb.ub[1] = self->max_comb;
|
||||
MaxComb.ub[2] = self->max_comb;
|
||||
MaxComb.ub[3] = self->max_comb;
|
||||
MaxComb.ub[4] = self->max_comb;
|
||||
MaxComb.ub[5] = self->max_comb;
|
||||
MaxComb.ub[6] = self->max_comb;
|
||||
MaxComb.ub[7] = self->max_comb;
|
||||
|
||||
ShiftMask.ub[0] = 0x7f;
|
||||
ShiftMask.ub[1] = 0x7f;
|
||||
ShiftMask.ub[2] = 0x7f;
|
||||
ShiftMask.ub[3] = 0x7f;
|
||||
ShiftMask.ub[4] = 0x7f;
|
||||
ShiftMask.ub[5] = 0x7f;
|
||||
ShiftMask.ub[6] = 0x7f;
|
||||
ShiftMask.ub[7] = 0x7f;
|
||||
|
||||
// L2 == m0
|
||||
// L1 == t1
|
||||
// L3 == b1
|
||||
// LP2 == m2
|
||||
|
||||
movq_m2r (MaxComb, mm6);
|
||||
|
||||
for (; width > 7; width -= 8) {
|
||||
movq_m2r (*t1, mm1); // L1
|
||||
movq_m2r (*m0, mm2); // L2
|
||||
movq_m2r (*b1, mm3); // L3
|
||||
movq_m2r (*m2, mm0); // LP2
|
||||
|
||||
// average L1 and L3 leave result in mm4
|
||||
movq_r2r (mm1, mm4); // L1
|
||||
movq_r2r (mm3, mm5); // L3
|
||||
psrlw_i2r (1, mm4); // L1/2
|
||||
pand_m2r (ShiftMask, mm4);
|
||||
psrlw_i2r (1, mm5); // L3/2
|
||||
pand_m2r (ShiftMask, mm5);
|
||||
paddusb_r2r (mm5, mm4); // (L1 + L3) / 2
|
||||
|
||||
// get abs value of possible L2 comb
|
||||
movq_r2r (mm2, mm7); // L2
|
||||
psubusb_r2r (mm4, mm7); // L2 - avg
|
||||
movq_r2r (mm4, mm5); // avg
|
||||
psubusb_r2r (mm2, mm5); // avg - L2
|
||||
por_r2r (mm7, mm5); // abs(avg-L2)
|
||||
|
||||
// get abs value of possible LP2 comb
|
||||
movq_r2r (mm0, mm7); // LP2
|
||||
psubusb_r2r (mm4, mm7); // LP2 - avg
|
||||
psubusb_r2r (mm0, mm4); // avg - LP2
|
||||
por_r2r (mm7, mm4); // abs(avg-LP2)
|
||||
|
||||
// use L2 or LP2 depending upon which makes smaller comb
|
||||
psubusb_r2r (mm5, mm4); // see if it goes to zero
|
||||
psubusb_r2r (mm5, mm5); // 0
|
||||
pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
|
||||
pcmpeqb_r2r (mm4, mm5); // opposite of mm4
|
||||
|
||||
// if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
|
||||
pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
|
||||
pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
|
||||
por_r2r (mm5, mm4); // may the best win
|
||||
|
||||
// Now lets clip our chosen value to be not outside of the range
|
||||
// of the high/low range L1-L3 by more than abs(L1-L3)
|
||||
// This allows some comb but limits the damages and also allows more
|
||||
// detail than a boring oversmoothed clip.
|
||||
|
||||
movq_r2r (mm1, mm2); // copy L1
|
||||
psubusb_r2r (mm3, mm2); // - L3, with saturation
|
||||
paddusb_r2r (mm3, mm2); // now = Max(L1,L3)
|
||||
|
||||
pcmpeqb_r2r (mm7, mm7); // all ffffffff
|
||||
psubusb_r2r (mm1, mm7); // - L1
|
||||
paddusb_r2r (mm7, mm3); // add, may sat at fff..
|
||||
psubusb_r2r (mm7, mm3); // now = Min(L1,L3)
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
paddusb_r2r (mm6, mm2); // increase max by diff
|
||||
psubusb_r2r (mm6, mm3); // lower min by diff
|
||||
|
||||
psubusb_r2r (mm3, mm4); // best - Min
|
||||
paddusb_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
|
||||
|
||||
pcmpeqb_r2r (mm7, mm7); // all ffffffff
|
||||
psubusb_r2r (mm4, mm7); // - Max(best,Min(best,L3)
|
||||
paddusb_r2r (mm7, mm2); // add may sat at FFF..
|
||||
psubusb_r2r (mm7, mm2); // now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped
|
||||
|
||||
movq_r2m (mm2, *output); // move in our clipped best
|
||||
|
||||
// Advance to the next set of pixels.
|
||||
output += 8;
|
||||
m0 += 8;
|
||||
t1 += 8;
|
||||
b1 += 8;
|
||||
m2 += 8;
|
||||
}
|
||||
emms ();
|
||||
if (width > 0)
|
||||
deinterlace_greedy_scanline_c (self, m0, t1, b1, m2, output, width);
|
||||
}
|
||||
|
||||
#include "sse.h"
|
||||
|
||||
static void
|
||||
deinterlace_greedy_scanline_mmxext (GstDeinterlaceMethodGreedyL *
|
||||
self, const guint8 * m0, const guint8 * t1, const guint8 * b1,
|
||||
const guint8 * m2, guint8 * output, gint width)
|
||||
{
|
||||
mmx_t MaxComb;
|
||||
|
||||
// How badly do we let it weave? 0-255
|
||||
MaxComb.ub[0] = self->max_comb;
|
||||
MaxComb.ub[1] = self->max_comb;
|
||||
MaxComb.ub[2] = self->max_comb;
|
||||
MaxComb.ub[3] = self->max_comb;
|
||||
MaxComb.ub[4] = self->max_comb;
|
||||
MaxComb.ub[5] = self->max_comb;
|
||||
MaxComb.ub[6] = self->max_comb;
|
||||
MaxComb.ub[7] = self->max_comb;
|
||||
|
||||
// L2 == m0
|
||||
// L1 == t1
|
||||
// L3 == b1
|
||||
// LP2 == m2
|
||||
|
||||
movq_m2r (MaxComb, mm6);
|
||||
|
||||
for (; width > 7; width -= 8) {
|
||||
movq_m2r (*t1, mm1); // L1
|
||||
movq_m2r (*m0, mm2); // L2
|
||||
movq_m2r (*b1, mm3); // L3
|
||||
movq_m2r (*m2, mm0); // LP2
|
||||
|
||||
// average L1 and L3 leave result in mm4
|
||||
movq_r2r (mm1, mm4); // L1
|
||||
pavgb_r2r (mm3, mm4); // (L1 + L3)/2
|
||||
|
||||
// get abs value of possible L2 comb
|
||||
movq_r2r (mm2, mm7); // L2
|
||||
psubusb_r2r (mm4, mm7); // L2 - avg
|
||||
movq_r2r (mm4, mm5); // avg
|
||||
psubusb_r2r (mm2, mm5); // avg - L2
|
||||
por_r2r (mm7, mm5); // abs(avg-L2)
|
||||
|
||||
// get abs value of possible LP2 comb
|
||||
movq_r2r (mm0, mm7); // LP2
|
||||
psubusb_r2r (mm4, mm7); // LP2 - avg
|
||||
psubusb_r2r (mm0, mm4); // avg - LP2
|
||||
por_r2r (mm7, mm4); // abs(avg-LP2)
|
||||
|
||||
// use L2 or LP2 depending upon which makes smaller comb
|
||||
psubusb_r2r (mm5, mm4); // see if it goes to zero
|
||||
pxor_r2r (mm5, mm5); // 0
|
||||
pcmpeqb_r2r (mm5, mm4); // if (mm4=0) then FF else 0
|
||||
pcmpeqb_r2r (mm4, mm5); // opposite of mm4
|
||||
|
||||
// if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
|
||||
pand_r2r (mm2, mm5); // use L2 if mm5 == ff, else 0
|
||||
pand_r2r (mm0, mm4); // use LP2 if mm4 = ff, else 0
|
||||
por_r2r (mm5, mm4); // may the best win
|
||||
|
||||
// Now lets clip our chosen value to be not outside of the range
|
||||
// of the high/low range L1-L3 by more than abs(L1-L3)
|
||||
// This allows some comb but limits the damages and also allows more
|
||||
// detail than a boring oversmoothed clip.
|
||||
|
||||
movq_r2r (mm1, mm2); // copy L1
|
||||
pmaxub_r2r (mm3, mm2); // now = Max(L1,L3)
|
||||
|
||||
pminub_r2r (mm1, mm3); // now = Min(L1,L3)
|
||||
|
||||
// allow the value to be above the high or below the low by amt of MaxComb
|
||||
paddusb_r2r (mm6, mm2); // increase max by diff
|
||||
psubusb_r2r (mm6, mm3); // lower min by diff
|
||||
|
||||
|
||||
pmaxub_r2r (mm3, mm4); // now = Max(best,Min(L1,L3)
|
||||
pminub_r2r (mm4, mm2); // now = Min( Max(best, Min(L1,L3)), L2 )=L2 clipped
|
||||
|
||||
movq_r2m (mm2, *output); // move in our clipped best
|
||||
|
||||
// Advance to the next set of pixels.
|
||||
output += 8;
|
||||
m0 += 8;
|
||||
t1 += 8;
|
||||
b1 += 8;
|
||||
m2 += 8;
|
||||
}
|
||||
emms ();
|
||||
|
||||
if (width > 0)
|
||||
deinterlace_greedy_scanline_c (self, m0, t1, b1, m2, output, width);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
deinterlace_frame_di_greedy_packed (GstDeinterlaceMethod * method,
|
||||
const GstDeinterlaceField * history, guint history_count,
|
||||
|
@ -561,10 +300,6 @@ gst_deinterlace_method_greedy_l_class_init (GstDeinterlaceMethodGreedyLClass *
|
|||
{
|
||||
GstDeinterlaceMethodClass *dim_class = (GstDeinterlaceMethodClass *) klass;
|
||||
GObjectClass *gobject_class = (GObjectClass *) klass;
|
||||
#ifdef BUILD_X86_ASM
|
||||
guint cpu_flags =
|
||||
orc_target_get_default_flags (orc_target_get_by_name ("mmx"));
|
||||
#endif
|
||||
|
||||
gobject_class->set_property = gst_deinterlace_method_greedy_l_set_property;
|
||||
gobject_class->get_property = gst_deinterlace_method_greedy_l_get_property;
|
||||
|
@ -596,18 +331,7 @@ gst_deinterlace_method_greedy_l_class_init (GstDeinterlaceMethodGreedyLClass *
|
|||
dim_class->deinterlace_frame_rgb = deinterlace_frame_di_greedy_packed;
|
||||
dim_class->deinterlace_frame_bgr = deinterlace_frame_di_greedy_packed;
|
||||
|
||||
#ifdef BUILD_X86_ASM
|
||||
if (cpu_flags & ORC_TARGET_MMX_MMXEXT) {
|
||||
klass->scanline = deinterlace_greedy_scanline_mmxext;
|
||||
} else if (cpu_flags & ORC_TARGET_MMX_MMX) {
|
||||
klass->scanline = deinterlace_greedy_scanline_mmx;
|
||||
} else {
|
||||
klass->scanline = deinterlace_greedy_scanline_c;
|
||||
}
|
||||
#else
|
||||
klass->scanline = deinterlace_greedy_scanline_c;
|
||||
klass->scanline = deinterlace_greedy_scanline_orc;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
Loading…
Reference in a new issue