gstreamer/subprojects/packagefiles/pixman-0.42.2/0001-pixman-arma64-Adjustments-to-build-with-llvm-integra.patch

3277 lines
130 KiB
Diff

From 67490a8bc12647a78d92f62f0af50cd7f4146ca1 Mon Sep 17 00:00:00 2001
From: Heiko Lewin <hlewin@worldiety.de>
Date: Tue, 18 Jul 2023 07:19:34 +0200
Subject: [PATCH] pixman-arma64: Adjustments to build with llvm integrated
assembler
This enables building the aarch64 assembly with clang.
Changes:
1. Use `.func` or `.endfunc` only if available
2. Prefix macro arg names with `\`
3. Use `\()` instead of `&`
4. Always use commas to separate macro arguments
5. Prefix asm symbols with an undderscore if necessary
---
meson.build | 28 +
pixman/pixman-arm-asm.h | 32 +-
pixman/pixman-arma64-neon-asm-bilinear.S | 421 ++++++-------
pixman/pixman-arma64-neon-asm.S | 720 +++++++++++------------
pixman/pixman-arma64-neon-asm.h | 684 ++++++++++-----------
test/utils/utils.h | 2 +-
6 files changed, 968 insertions(+), 919 deletions(-)
diff --git a/meson.build b/meson.build
index 42dbe93..b5f78f7 100644
--- a/meson.build
+++ b/meson.build
@@ -243,6 +243,34 @@ if not use_vmx.disabled()
endif
endif
+if cc.compiles('''
+ __asm__ (
+ ".func meson_test"
+ ".endfunc"
+ );''',
+ name : 'test for ASM .func directive')
+ config.set('ASM_HAVE_FUNC_DIRECTIVE', 1)
+endif
+
+
+if cc.links('''
+ #include <stdint.h>
+
+ __asm__ (
+ " .global _testlabel\n"
+ "_testlabel:\n"
+ );
+
+ int testlabel();
+ int main(int argc, char* argv[]) {
+ return testlabel();
+ }''',
+ name : 'test for ASM leading underscore')
+ config.set('ASM_LEADING_UNDERSCORE', 1)
+endif
+
+
+
if have_vmx
config.set10('USE_VMX', true)
elif use_vmx.enabled()
diff --git a/pixman/pixman-arm-asm.h b/pixman/pixman-arm-asm.h
index ee78541..8253906 100644
--- a/pixman/pixman-arm-asm.h
+++ b/pixman/pixman-arm-asm.h
@@ -25,13 +25,33 @@
*
*/
+
+#include "config.h"
+
+
/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
- .func fname
- .global fname
+.macro pixman_asm_function_impl fname
+#ifdef ASM_HAVE_FUNC_DIRECTIVE
+ .func \fname
+#endif
+ .global \fname
#ifdef __ELF__
- .hidden fname
- .type fname, %function
+ .hidden \fname
+ .type \fname, %function
+#endif
+\fname:
+.endm
+
+.macro pixman_asm_function fname
+#ifdef ASM_LEADING_UNDERSCORE
+ pixman_asm_function_impl _\fname
+#else
+ pixman_asm_function_impl \fname
+#endif
+.endm
+
+.macro pixman_end_asm_function
+#ifdef ASM_HAVE_FUNC_DIRECTIVE
+ .endfunc
#endif
-fname:
.endm
diff --git a/pixman/pixman-arma64-neon-asm-bilinear.S b/pixman/pixman-arma64-neon-asm-bilinear.S
index 31d103d..7303bdc 100644
--- a/pixman/pixman-arma64-neon-asm-bilinear.S
+++ b/pixman/pixman-arma64-neon-asm-bilinear.S
@@ -77,50 +77,50 @@
asr WTMP1, X, #16
add X, X, UX
add TMP1, TOP, TMP1, lsl #2
- ld1 {&reg1&.2s}, [TMP1], STRIDE
- ld1 {&reg2&.2s}, [TMP1]
+ ld1 {\()\reg1\().2s}, [TMP1], STRIDE
+ ld1 {\()\reg2\().2s}, [TMP1]
.endm
.macro bilinear_load_0565 reg1, reg2, tmp
asr WTMP1, X, #16
add X, X, UX
add TMP1, TOP, TMP1, lsl #1
- ld1 {&reg2&.s}[0], [TMP1], STRIDE
- ld1 {&reg2&.s}[1], [TMP1]
- convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+ ld1 {\()\reg2\().s}[0], [TMP1], STRIDE
+ ld1 {\()\reg2\().s}[1], [TMP1]
+ convert_four_0565_to_x888_packed \reg2, \reg1, \reg2, \tmp
.endm
.macro bilinear_load_and_vertical_interpolate_two_8888 \
acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
- bilinear_load_8888 reg1, reg2, tmp1
- umull &acc1&.8h, &reg1&.8b, v28.8b
- umlal &acc1&.8h, &reg2&.8b, v29.8b
- bilinear_load_8888 reg3, reg4, tmp2
- umull &acc2&.8h, &reg3&.8b, v28.8b
- umlal &acc2&.8h, &reg4&.8b, v29.8b
+ bilinear_load_8888 \reg1, \reg2, \tmp1
+ umull \()\acc1\().8h, \()\reg1\().8b, v28.8b
+ umlal \()\acc1\().8h, \()\reg2\().8b, v29.8b
+ bilinear_load_8888 \reg3, \reg4, \tmp2
+ umull \()\acc2\().8h, \()\reg3\().8b, v28.8b
+ umlal \()\acc2\().8h, \()\reg4\().8b, v29.8b
.endm
.macro bilinear_load_and_vertical_interpolate_four_8888 \
- xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi, \
yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
bilinear_load_and_vertical_interpolate_two_8888 \
- xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
+ \xacc1, \xacc2, \xreg1, \xreg2, \xreg3, \xreg4, \xacc2lo, xacc2hi
bilinear_load_and_vertical_interpolate_two_8888 \
- yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+ \yacc1, \yacc2, \yreg1, \yreg2, \yreg3, \yreg4, \yacc2lo, \yacc2hi
.endm
.macro vzip reg1, reg2
- zip1 v24.8b, reg1, reg2
- zip2 reg2, reg1, reg2
- mov reg1, v24.8b
+ zip1 v24.8b, \reg1, \reg2
+ zip2 \reg2, \reg1, \reg2
+ mov \reg1, v24.8b
.endm
.macro vuzp reg1, reg2
- uzp1 v24.8b, reg1, reg2
- uzp2 reg2, reg1, reg2
- mov reg1, v24.8b
+ uzp1 v24.8b, \reg1, \reg2
+ uzp2 \reg2, \reg1, \reg2
+ mov \reg1, v24.8b
.endm
.macro bilinear_load_and_vertical_interpolate_two_0565 \
@@ -131,23 +131,23 @@
asr WTMP2, X, #16
add X, X, UX
add TMP2, TOP, TMP2, lsl #1
- ld1 {&acc2&.s}[0], [TMP1], STRIDE
- ld1 {&acc2&.s}[2], [TMP2], STRIDE
- ld1 {&acc2&.s}[1], [TMP1]
- ld1 {&acc2&.s}[3], [TMP2]
- convert_0565_to_x888 acc2, reg3, reg2, reg1
- vzip &reg1&.8b, &reg3&.8b
- vzip &reg2&.8b, &reg4&.8b
- vzip &reg3&.8b, &reg4&.8b
- vzip &reg1&.8b, &reg2&.8b
- umull &acc1&.8h, &reg1&.8b, v28.8b
- umlal &acc1&.8h, &reg2&.8b, v29.8b
- umull &acc2&.8h, &reg3&.8b, v28.8b
- umlal &acc2&.8h, &reg4&.8b, v29.8b
+ ld1 {\()\acc2\().s}[0], [TMP1], STRIDE
+ ld1 {\()\acc2\().s}[2], [TMP2], STRIDE
+ ld1 {\()\acc2\().s}[1], [TMP1]
+ ld1 {\()\acc2\().s}[3], [TMP2]
+ convert_0565_to_x888 \acc2, \reg3, \reg2, \reg1
+ vzip \()\reg1\().8b, \()\reg3\().8b
+ vzip \()\reg2\().8b, \()\reg4\().8b
+ vzip \()\reg3\().8b, \()\reg4\().8b
+ vzip \()\reg1\().8b, \()\reg2\().8b
+ umull \()\acc1\().8h, \()\reg1\().8b, v28.8b
+ umlal \()\acc1\().8h, \()\reg2\().8b, v29.8b
+ umull \()\acc2\().8h, \()\reg3\().8b, v28.8b
+ umlal \()\acc2\().8h, \()\reg4\().8b, v29.8b
.endm
.macro bilinear_load_and_vertical_interpolate_four_0565 \
- xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi, \
yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
asr WTMP1, X, #16
@@ -156,49 +156,49 @@
asr WTMP2, X, #16
add X, X, UX
add TMP2, TOP, TMP2, lsl #1
- ld1 {&xacc2&.s}[0], [TMP1], STRIDE
- ld1 {&xacc2&.s}[2], [TMP2], STRIDE
- ld1 {&xacc2&.s}[1], [TMP1]
- ld1 {&xacc2&.s}[3], [TMP2]
- convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
+ ld1 {\()\xacc2\().s}[0], [TMP1], STRIDE
+ ld1 {\()\xacc2\().s}[2], [TMP2], STRIDE
+ ld1 {\()\xacc2\().s}[1], [TMP1]
+ ld1 {\()\xacc2\().s}[3], [TMP2]
+ convert_0565_to_x888 \xacc2, \xreg3, \xreg2, \xreg1
asr WTMP1, X, #16
add X, X, UX
add TMP1, TOP, TMP1, lsl #1
asr WTMP2, X, #16
add X, X, UX
add TMP2, TOP, TMP2, lsl #1
- ld1 {&yacc2&.s}[0], [TMP1], STRIDE
- vzip &xreg1&.8b, &xreg3&.8b
- ld1 {&yacc2&.s}[2], [TMP2], STRIDE
- vzip &xreg2&.8b, &xreg4&.8b
- ld1 {&yacc2&.s}[1], [TMP1]
- vzip &xreg3&.8b, &xreg4&.8b
- ld1 {&yacc2&.s}[3], [TMP2]
- vzip &xreg1&.8b, &xreg2&.8b
- convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
- umull &xacc1&.8h, &xreg1&.8b, v28.8b
- vzip &yreg1&.8b, &yreg3&.8b
- umlal &xacc1&.8h, &xreg2&.8b, v29.8b
- vzip &yreg2&.8b, &yreg4&.8b
- umull &xacc2&.8h, &xreg3&.8b, v28.8b
- vzip &yreg3&.8b, &yreg4&.8b
- umlal &xacc2&.8h, &xreg4&.8b, v29.8b
- vzip &yreg1&.8b, &yreg2&.8b
- umull &yacc1&.8h, &yreg1&.8b, v28.8b
- umlal &yacc1&.8h, &yreg2&.8b, v29.8b
- umull &yacc2&.8h, &yreg3&.8b, v28.8b
- umlal &yacc2&.8h, &yreg4&.8b, v29.8b
+ ld1 {\()\yacc2\().s}[0], [TMP1], STRIDE
+ vzip \()\xreg1\().8b, \()\xreg3\().8b
+ ld1 {\()\yacc2\().s}[2], [TMP2], STRIDE
+ vzip \()\xreg2\().8b, \()\xreg4\().8b
+ ld1 {\()\yacc2\().s}[1], [TMP1]
+ vzip \()\xreg3\().8b, \()\xreg4\().8b
+ ld1 {\()\yacc2\().s}[3], [TMP2]
+ vzip \()\xreg1\().8b, \()\xreg2\().8b
+ convert_0565_to_x888 \yacc2, \yreg3, \yreg2, \yreg1
+ umull \()\xacc1\().8h, \()\xreg1\().8b, v28.8b
+ vzip \()\yreg1\().8b, \()\yreg3\().8b
+ umlal \()\xacc1\().8h, \()\xreg2\().8b, v29.8b
+ vzip \()\yreg2\().8b, \()\yreg4\().8b
+ umull \()\xacc2\().8h, \()\xreg3\().8b, v28.8b
+ vzip \()\yreg3\().8b, \()\yreg4\().8b
+ umlal \()\xacc2\().8h, \()\xreg4\().8b, v29.8b
+ vzip \()\yreg1\().8b, \()\yreg2\().8b
+ umull \()\yacc1\().8h, \()\yreg1\().8b, v28.8b
+ umlal \()\yacc1\().8h, \()\yreg2\().8b, v29.8b
+ umull \()\yacc2\().8h, \()\yreg3\().8b, v28.8b
+ umlal \()\yacc2\().8h, \()\yreg4\().8b, v29.8b
.endm
.macro bilinear_store_8888 numpix, tmp1, tmp2
-.if numpix == 4
+.if \numpix == 4
st1 {v0.2s, v1.2s}, [OUT], #16
-.elseif numpix == 2
+.elseif \numpix == 2
st1 {v0.2s}, [OUT], #8
-.elseif numpix == 1
+.elseif \numpix == 1
st1 {v0.s}[0], [OUT], #4
.else
- .error bilinear_store_8888 numpix is unsupported
+ .error bilinear_store_8888 \numpix is unsupported
.endif
.endm
@@ -207,15 +207,15 @@
vuzp v2.8b, v3.8b
vuzp v1.8b, v3.8b
vuzp v0.8b, v2.8b
- convert_8888_to_0565 v2, v1, v0, v1, tmp1, tmp2
-.if numpix == 4
+ convert_8888_to_0565 v2, v1, v0, v1, \tmp1, \tmp2
+.if \numpix == 4
st1 {v1.4h}, [OUT], #8
-.elseif numpix == 2
+.elseif \numpix == 2
st1 {v1.s}[0], [OUT], #4
-.elseif numpix == 1
+.elseif \numpix == 1
st1 {v1.h}[0], [OUT], #2
.else
- .error bilinear_store_0565 numpix is unsupported
+ .error bilinear_store_0565 \numpix is unsupported
.endif
.endm
@@ -228,20 +228,20 @@
.endm
.macro bilinear_load_mask_8 numpix, mask
-.if numpix == 4
- ld1 {&mask&.s}[0], [MASK], #4
-.elseif numpix == 2
- ld1 {&mask&.h}[0], [MASK], #2
-.elseif numpix == 1
- ld1 {&mask&.b}[0], [MASK], #1
+.if \numpix == 4
+ ld1 {\()\mask\().s}[0], [MASK], #4
+.elseif \numpix == 2
+ ld1 {\()\mask\().h}[0], [MASK], #2
+.elseif \numpix == 1
+ ld1 {\()\mask\().b}[0], [MASK], #1
.else
- .error bilinear_load_mask_8 numpix is unsupported
+ .error bilinear_load_mask_8 \numpix is unsupported
.endif
- prfm PREFETCH_MODE, [MASK, #prefetch_offset]
+ prfum PREFETCH_MODE, [MASK, #(prefetch_offset)]
.endm
.macro bilinear_load_mask mask_fmt, numpix, mask
- bilinear_load_mask_&mask_fmt numpix, mask
+ bilinear_load_mask_\mask_fmt \numpix, \mask
.endm
@@ -256,30 +256,30 @@
.endm
.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01
-.if numpix == 4
- ld1 {&dst0&.2s, &dst1&.2s}, [OUT]
-.elseif numpix == 2
- ld1 {&dst0&.2s}, [OUT]
-.elseif numpix == 1
- ld1 {&dst0&.s}[0], [OUT]
+.if \numpix == 4
+ ld1 {\()\dst0\().2s, \()\dst1\().2s}, [OUT]
+.elseif \numpix == 2
+ ld1 {\()\dst0\().2s}, [OUT]
+.elseif \numpix == 1
+ ld1 {\()\dst0\().s}[0], [OUT]
.else
- .error bilinear_load_dst_8888 numpix is unsupported
+ .error bilinear_load_dst_8888 \numpix is unsupported
.endif
- mov &dst01&.d[0], &dst0&.d[0]
- mov &dst01&.d[1], &dst1&.d[0]
+ mov \()\dst01\().d[0], \()\dst0\().d[0]
+ mov \()\dst01\().d[1], \()\dst1\().d[0]
prfm PREFETCH_MODE, [OUT, #(prefetch_offset * 4)]
.endm
.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01
- bilinear_load_dst_8888 numpix, dst0, dst1, dst01
+ bilinear_load_dst_8888 \numpix, \dst0, \dst1, \dst01
.endm
.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01
- bilinear_load_dst_8888 numpix, dst0, dst1, dst01
+ bilinear_load_dst_8888 \numpix, \dst0, \dst1, \dst01
.endm
.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01
- bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01
+ bilinear_load_dst_\()\dst_fmt\()_\()\op \numpix, \dst0, \dst1, \dst01
.endm
/*
@@ -298,19 +298,19 @@
.endm
.macro bilinear_duplicate_mask_8 numpix, mask
-.if numpix == 4
- dup &mask&.2s, &mask&.s[0]
-.elseif numpix == 2
- dup &mask&.4h, &mask&.h[0]
-.elseif numpix == 1
- dup &mask&.8b, &mask&.b[0]
+.if \numpix == 4
+ dup \()\mask\().2s, \()\mask\().s[0]
+.elseif \numpix == 2
+ dup \()\mask\().4h, \()\mask\().h[0]
+.elseif \numpix == 1
+ dup \()\mask\().8b, \()\mask\().b[0]
.else
- .error bilinear_duplicate_mask_8 is unsupported
+ .error bilinear_duplicate_\mask_8 is unsupported
.endif
.endm
.macro bilinear_duplicate_mask mask_fmt, numpix, mask
- bilinear_duplicate_mask_&mask_fmt numpix, mask
+ bilinear_duplicate_mask_\()\mask_fmt \numpix, \mask
.endm
/*
@@ -318,14 +318,14 @@
* Interleave should be done when maks is enabled or operator is 'over'.
*/
.macro bilinear_interleave src0, src1, src01, dst0, dst1, dst01
- vuzp &src0&.8b, &src1&.8b
- vuzp &dst0&.8b, &dst1&.8b
- vuzp &src0&.8b, &src1&.8b
- vuzp &dst0&.8b, &dst1&.8b
- mov &src01&.d[1], &src1&.d[0]
- mov &src01&.d[0], &src0&.d[0]
- mov &dst01&.d[1], &dst1&.d[0]
- mov &dst01&.d[0], &dst0&.d[0]
+ vuzp \()\src0\().8b, \()\src1\().8b
+ vuzp \()\dst0\().8b, \()\dst1\().8b
+ vuzp \()\src0\().8b, \()\src1\().8b
+ vuzp \()\dst0\().8b, \()\dst1\().8b
+ mov \()\src01\().d[1], \()\src1\().d[0]
+ mov \()\src01\().d[0], \()\src0\().d[0]
+ mov \()\dst01\().d[1], \()\dst1\().d[0]
+ mov \()\dst01\().d[0], \()\dst0\().d[0]
.endm
.macro bilinear_interleave_src_dst_x_src \
@@ -335,37 +335,38 @@
.macro bilinear_interleave_src_dst_x_over \
numpix, src0, src1, src01, dst0, dst1, dst01
- bilinear_interleave src0, src1, src01, dst0, dst1, dst01
+ bilinear_interleave \src0, \src1, \src01, \dst0, \dst1, \dst01
.endm
.macro bilinear_interleave_src_dst_x_add \
numpix, src0, src1, src01, dst0, dst1, dst01
- bilinear_interleave src0, src1, src01, dst0, dst1, dst01
+
+ bilinear_interleave \src0, \src1, \src01, \dst0, \dst1, \dst01
.endm
.macro bilinear_interleave_src_dst_8_src \
numpix, src0, src1, src01, dst0, dst1, dst01
- bilinear_interleave src0, src1, src01, dst0, dst1, dst01
+ bilinear_interleave \src0, \src1, \src01, \dst0, \dst1, \dst01
.endm
.macro bilinear_interleave_src_dst_8_over \
numpix, src0, src1, src01, dst0, dst1, dst01
- bilinear_interleave src0, src1, src01, dst0, dst1, dst01
+ bilinear_interleave \src0, \src1, \src01, \dst0, \dst1, \dst01
.endm
.macro bilinear_interleave_src_dst_8_add \
numpix, src0, src1, src01, dst0, dst1, dst01
- bilinear_interleave src0, src1, src01, dst0, dst1, dst01
+ bilinear_interleave \src0, \src1, \src01, \dst0, \dst1, \dst01
.endm
.macro bilinear_interleave_src_dst \
mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01
- bilinear_interleave_src_dst_&mask_fmt&_&op \
- numpix, src0, src1, src01, dst0, dst1, dst01
+ bilinear_interleave_src_dst_\()\mask_fmt\()_\()\op \
+ \numpix, \src0, \src1, \src01, \dst0, \dst1, \dst01
.endm
@@ -383,25 +384,25 @@
numpix, src0, src1, src01, mask, \
tmp01, tmp23, tmp45, tmp67
- umull &tmp01&.8h, &src0&.8b, &mask&.8b
- umull &tmp23&.8h, &src1&.8b, &mask&.8b
+ umull \()\tmp01\().8h, \()\src0\().8b, \()\mask\().8b
+ umull \()\tmp23\().8h, \()\src1\().8b, \()\mask\().8b
/* bubbles */
- urshr &tmp45&.8h, &tmp01&.8h, #8
- urshr &tmp67&.8h, &tmp23&.8h, #8
+ urshr \()\tmp45\().8h, \()\tmp01\().8h, #8
+ urshr \()\tmp67\().8h, \()\tmp23\().8h, #8
/* bubbles */
- raddhn &src0&.8b, &tmp45&.8h, &tmp01&.8h
- raddhn &src1&.8b, &tmp67&.8h, &tmp23&.8h
- mov &src01&.d[0], &src0&.d[0]
- mov &src01&.d[1], &src1&.d[0]
+ raddhn \()\src0\().8b, \()\tmp45\().8h, \()\tmp01\().8h
+ raddhn \()\src1\().8b, \()\tmp67\().8h, \()\tmp23\().8h
+ mov \()\src01\().d[0], \()\src0\().d[0]
+ mov \()\src01\().d[1], \()\src1\().d[0]
.endm
.macro bilinear_apply_mask_to_src \
mask_fmt, numpix, src0, src1, src01, mask, \
tmp01, tmp23, tmp45, tmp67
- bilinear_apply_mask_to_src_&mask_fmt \
- numpix, src0, src1, src01, mask, \
- tmp01, tmp23, tmp45, tmp67
+ bilinear_apply_mask_to_src_\()\mask_fmt \
+ \numpix, \src0, \src1, \src01, \mask, \
+ \tmp01, \tmp23, \tmp45, \tmp67
.endm
@@ -418,90 +419,90 @@
numpix, src0, src1, src01, dst0, dst1, dst01, \
tmp01, tmp23, tmp45, tmp67, tmp8
- dup &tmp8&.2s, &src1&.s[1]
+ dup \()\tmp8\().2s, \()\src1\().s[1]
/* bubbles */
- mvn &tmp8&.8b, &tmp8&.8b
+ mvn \()\tmp8\().8b, \()\tmp8\().8b
/* bubbles */
- umull &tmp01&.8h, &dst0&.8b, &tmp8&.8b
+ umull \()\tmp01\().8h, \()\dst0\().8b, \()\tmp8\().8b
/* bubbles */
- umull &tmp23&.8h, &dst1&.8b, &tmp8&.8b
+ umull \()\tmp23\().8h, \()\dst1\().8b, \()\tmp8\().8b
/* bubbles */
- urshr &tmp45&.8h, &tmp01&.8h, #8
- urshr &tmp67&.8h, &tmp23&.8h, #8
+ urshr \()\tmp45\().8h, \()\tmp01\().8h, #8
+ urshr \()\tmp67\().8h, \()\tmp23\().8h, #8
/* bubbles */
- raddhn &dst0&.8b, &tmp45&.8h, &tmp01&.8h
- raddhn &dst1&.8b, &tmp67&.8h, &tmp23&.8h
- mov &dst01&.d[0], &dst0&.d[0]
- mov &dst01&.d[1], &dst1&.d[0]
+ raddhn \()\dst0\().8b, \()\tmp45\().8h, \()\tmp01\().8h
+ raddhn \()\dst1\().8b, \()\tmp67\().8h, \()\tmp23\().8h
+ mov \()\dst01\().d[0], \()\dst0\().d[0]
+ mov \()\dst01\().d[1], \()\dst1\().d[0]
/* bubbles */
- uqadd &src0&.8b, &dst0&.8b, &src0&.8b
- uqadd &src1&.8b, &dst1&.8b, &src1&.8b
- mov &src01&.d[0], &src0&.d[0]
- mov &src01&.d[1], &src1&.d[0]
+ uqadd \()\src0\().8b, \()\dst0\().8b, \()\src0\().8b
+ uqadd \()\src1\().8b, \()\dst1\().8b, \()\src1\().8b
+ mov \()\src01\().d[0], \()\src0\().d[0]
+ mov \()\src01\().d[1], \()\src1\().d[0]
.endm
.macro bilinear_combine_add \
numpix, src0, src1, src01, dst0, dst1, dst01, \
tmp01, tmp23, tmp45, tmp67, tmp8
- uqadd &src0&.8b, &dst0&.8b, &src0&.8b
- uqadd &src1&.8b, &dst1&.8b, &src1&.8b
- mov &src01&.d[0], &src0&.d[0]
- mov &src01&.d[1], &src1&.d[0]
+ uqadd \()\src0\().8b, \()\dst0\().8b, \()\src0\().8b
+ uqadd \()\src1\().8b, \()\dst1\().8b, \()\src1\().8b
+ mov \()\src01\().d[0], \()\src0\().d[0]
+ mov \()\src01\().d[1], \()\src1\().d[0]
.endm
.macro bilinear_combine \
op, numpix, src0, src1, src01, dst0, dst1, dst01, \
tmp01, tmp23, tmp45, tmp67, tmp8
- bilinear_combine_&op \
- numpix, src0, src1, src01, dst0, dst1, dst01, \
- tmp01, tmp23, tmp45, tmp67, tmp8
+ bilinear_combine_\()\op \
+ \numpix, \src0, \src1, \src01, \dst0, \dst1, \dst01, \
+ \tmp01, \tmp23, \tmp45, \tmp67, \tmp8
.endm
/*
* Macros for final deinterleaving of destination pixels if needed.
*/
.macro bilinear_deinterleave numpix, dst0, dst1, dst01
- vuzp &dst0&.8b, &dst1&.8b
+ vuzp \()\dst0\().8b, \()\dst1\().8b
/* bubbles */
- vuzp &dst0&.8b, &dst1&.8b
- mov &dst01&.d[0], &dst0&.d[0]
- mov &dst01&.d[1], &dst1&.d[0]
+ vuzp \()\dst0\().8b, \()\dst1\().8b
+ mov \()\dst01\().d[0], \()\dst0\().d[0]
+ mov \()\dst01\().d[1], \()\dst1\().d[0]
.endm
.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01
.endm
.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01
- bilinear_deinterleave numpix, dst0, dst1, dst01
+ bilinear_deinterleave \numpix, \dst0, \dst1, \dst01
.endm
.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01
- bilinear_deinterleave numpix, dst0, dst1, dst01
+ bilinear_deinterleave \numpix, \dst0, \dst1, \dst01
.endm
.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01
- bilinear_deinterleave numpix, dst0, dst1, dst01
+ bilinear_deinterleave \numpix, \dst0, \dst1, \dst01
.endm
.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01
- bilinear_deinterleave numpix, dst0, dst1, dst01
+ bilinear_deinterleave \numpix, \dst0, \dst1, \dst01
.endm
.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01
- bilinear_deinterleave numpix, dst0, dst1, dst01
+ bilinear_deinterleave \numpix, \dst0, \dst1, \dst01
.endm
.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01
- bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01
+ bilinear_deinterleave_dst_\()\mask_fmt\()_\()\op \numpix, \dst0, \dst1, \dst01
.endm
.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op
- bilinear_load_&src_fmt v0, v1, v2
- bilinear_load_mask mask_fmt, 1, v4
- bilinear_load_dst dst_fmt, op, 1, v18, v19, v9
+ bilinear_load_\()\src_fmt v0, v1, v2
+ bilinear_load_mask \mask_fmt, 1, v4
+ bilinear_load_dst \dst_fmt, \op, 1, v18, v19, v9
umull v2.8h, v0.8b, v28.8b
umlal v2.8h, v1.8b, v29.8b
/* 5 cycles bubble */
@@ -509,28 +510,28 @@
umlsl v0.4s, v2.4h, v15.h[0]
umlal2 v0.4s, v2.8h, v15.h[0]
/* 5 cycles bubble */
- bilinear_duplicate_mask mask_fmt, 1, v4
+ bilinear_duplicate_mask \mask_fmt, 1, v4
shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
/* 3 cycles bubble */
xtn v0.8b, v0.8h
/* 1 cycle bubble */
bilinear_interleave_src_dst \
- mask_fmt, op, 1, v0, v1, v0, v18, v19, v9
+ \mask_fmt, \op, 1, v0, v1, v0, v18, v19, v9
bilinear_apply_mask_to_src \
- mask_fmt, 1, v0, v1, v0, v4, \
+ \mask_fmt, 1, v0, v1, v0, v4, \
v3, v8, v10, v11
bilinear_combine \
- op, 1, v0, v1, v0, v18, v19, v9, \
+ \op, 1, v0, v1, v0, v18, v19, v9, \
v3, v8, v10, v11, v5
- bilinear_deinterleave_dst mask_fmt, op, 1, v0, v1, v0
- bilinear_store_&dst_fmt 1, v17, v18
+ bilinear_deinterleave_dst \mask_fmt, \op, 1, v0, v1, v0
+ bilinear_store_\()\dst_fmt 1, v17, v18
.endm
.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op
- bilinear_load_and_vertical_interpolate_two_&src_fmt \
+ bilinear_load_and_vertical_interpolate_two_\()\src_fmt \
v1, v11, v18, v19, v20, v21, v22, v23
- bilinear_load_mask mask_fmt, 2, v4
- bilinear_load_dst dst_fmt, op, 2, v18, v19, v9
+ bilinear_load_mask \mask_fmt, 2, v4
+ bilinear_load_dst \dst_fmt, \op, 2, v18, v19, v9
ushll v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS
umlsl v0.4s, v1.4h, v15.h[0]
umlal2 v0.4s, v1.8h, v15.h[0]
@@ -539,25 +540,25 @@
umlal2 v10.4s, v11.8h, v15.h[4]
shrn v0.4h, v0.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
shrn2 v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
- bilinear_duplicate_mask mask_fmt, 2, v4
+ bilinear_duplicate_mask \mask_fmt, 2, v4
ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
add v12.8h, v12.8h, v13.8h
xtn v0.8b, v0.8h
bilinear_interleave_src_dst \
- mask_fmt, op, 2, v0, v1, v0, v18, v19, v9
+ \mask_fmt, \op, 2, v0, v1, v0, v18, v19, v9
bilinear_apply_mask_to_src \
- mask_fmt, 2, v0, v1, v0, v4, \
+ \mask_fmt, 2, v0, v1, v0, v4, \
v3, v8, v10, v11
bilinear_combine \
- op, 2, v0, v1, v0, v18, v19, v9, \
+ \op, 2, v0, v1, v0, v18, v19, v9, \
v3, v8, v10, v11, v5
- bilinear_deinterleave_dst mask_fmt, op, 2, v0, v1, v0
- bilinear_store_&dst_fmt 2, v16, v17
+ bilinear_deinterleave_dst \mask_fmt, \op, 2, v0, v1, v0
+ bilinear_store_\()\dst_fmt 2, v16, v17
.endm
.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op
- bilinear_load_and_vertical_interpolate_four_&src_fmt \
- v1, v11, v4, v5, v6, v7, v22, v23 \
+ bilinear_load_and_vertical_interpolate_four_\()\src_fmt \
+ v1, v11, v4, v5, v6, v7, v22, v23, \
v3, v9, v16, v17, v20, v21, v18, v19
prfm PREFETCH_MODE, [TMP1, PF_OFFS]
sub TMP1, TMP1, STRIDE
@@ -580,23 +581,23 @@
shrn2 v0.8h, v10.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
shrn v2.4h, v2.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
shrn2 v2.8h, v8.4s, #(2 * BILINEAR_INTERPOLATION_BITS)
- bilinear_load_mask mask_fmt, 4, v4
- bilinear_duplicate_mask mask_fmt, 4, v4
+ bilinear_load_mask \mask_fmt, 4, v4
+ bilinear_duplicate_mask \mask_fmt, 4, v4
ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
xtn v0.8b, v0.8h
xtn v1.8b, v2.8h
add v12.8h, v12.8h, v13.8h
- bilinear_load_dst dst_fmt, op, 4, v2, v3, v21
+ bilinear_load_dst \dst_fmt, \op, 4, v2, v3, v21
bilinear_interleave_src_dst \
- mask_fmt, op, 4, v0, v1, v0, v2, v3, v11
+ \mask_fmt, \op, 4, v0, v1, v0, v2, v3, v11
bilinear_apply_mask_to_src \
- mask_fmt, 4, v0, v1, v0, v4, \
+ \mask_fmt, 4, v0, v1, v0, v4, \
v6, v8, v9, v10
bilinear_combine \
- op, 4, v0, v1, v0, v2, v3, v1, \
+ \op, 4, v0, v1, v0, v2, v3, v1, \
v6, v8, v9, v10, v23
- bilinear_deinterleave_dst mask_fmt, op, 4, v0, v1, v0
- bilinear_store_&dst_fmt 4, v6, v7
+ bilinear_deinterleave_dst \mask_fmt, \op, 4, v0, v1, v0
+ bilinear_store_\()\dst_fmt 4, v6, v7
.endm
.set BILINEAR_FLAG_USE_MASK, 1
@@ -636,14 +637,14 @@
prefetch_distance, \
flags
-pixman_asm_function fname
-.if pixblock_size == 8
-.elseif pixblock_size == 4
+pixman_asm_function \fname
+.if \pixblock_size == 8
+.elseif \pixblock_size == 4
.else
.error unsupported pixblock size
.endif
-.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
+.if ((\flags) & BILINEAR_FLAG_USE_MASK) == 0
OUT .req x0
TOP .req x1
BOTTOM .req x2
@@ -699,7 +700,7 @@ pixman_asm_function fname
STRIDE .req x15
DUMMY .req x30
- .set prefetch_offset, prefetch_distance
+ .set prefetch_offset, \prefetch_distance
stp x29, x30, [sp, -16]!
mov x29, sp
@@ -714,7 +715,7 @@ pixman_asm_function fname
sub sp, sp, 120
.endif
- mov WTMP1, #prefetch_distance
+ mov WTMP1, #\prefetch_distance
umull PF_OFFS, WTMP1, UX
sub STRIDE, BOTTOM, TOP
@@ -735,11 +736,11 @@ pixman_asm_function fname
/* ensure good destination alignment */
cmp WIDTH, #1
blt 100f
- tst OUT, #(1 << dst_bpp_shift)
+ tst OUT, #(1 << \dst_bpp_shift)
beq 100f
ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
add v12.8h, v12.8h, v13.8h
- bilinear_process_last_pixel
+ \bilinear_process_last_pixel
sub WIDTH, WIDTH, #1
100:
add v13.8h, v13.8h, v13.8h
@@ -748,50 +749,50 @@ pixman_asm_function fname
cmp WIDTH, #2
blt 100f
- tst OUT, #(1 << (dst_bpp_shift + 1))
+ tst OUT, #(1 << (\dst_bpp_shift + 1))
beq 100f
- bilinear_process_two_pixels
+ \bilinear_process_two_pixels
sub WIDTH, WIDTH, #2
100:
-.if pixblock_size == 8
+.if \pixblock_size == 8
cmp WIDTH, #4
blt 100f
- tst OUT, #(1 << (dst_bpp_shift + 2))
+ tst OUT, #(1 << (\dst_bpp_shift + 2))
beq 100f
- bilinear_process_four_pixels
+ \bilinear_process_four_pixels
sub WIDTH, WIDTH, #4
100:
.endif
- subs WIDTH, WIDTH, #pixblock_size
+ subs WIDTH, WIDTH, #\pixblock_size
blt 100f
- asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
- bilinear_process_pixblock_head
- subs WIDTH, WIDTH, #pixblock_size
+ asr PF_OFFS, PF_OFFS, #(16 - \src_bpp_shift)
+ \bilinear_process_pixblock_head
+ subs WIDTH, WIDTH, #\pixblock_size
blt 500f
0:
- bilinear_process_pixblock_tail_head
- subs WIDTH, WIDTH, #pixblock_size
+ \bilinear_process_pixblock_tail_head
+ subs WIDTH, WIDTH, #\pixblock_size
bge 0b
500:
- bilinear_process_pixblock_tail
+ \bilinear_process_pixblock_tail
100:
-.if pixblock_size == 8
+.if \pixblock_size == 8
tst WIDTH, #4
beq 200f
- bilinear_process_four_pixels
+ \bilinear_process_four_pixels
200:
.endif
/* handle the remaining trailing pixels */
tst WIDTH, #2
beq 200f
- bilinear_process_two_pixels
+ \bilinear_process_two_pixels
200:
tst WIDTH, #1
beq 300f
- bilinear_process_last_pixel
+ \bilinear_process_last_pixel
300:
-.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
+.if ((\flags) & BILINEAR_FLAG_USE_MASK) == 0
sub x29, x29, 64
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
@@ -829,11 +830,11 @@ pixman_asm_function fname
.unreq TMP3
.unreq TMP4
.unreq STRIDE
-.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0
+.if ((\flags) & BILINEAR_FLAG_USE_MASK) != 0
.unreq MASK
.endif
-.endfunc
+pixman_end_asm_function
.endm
diff --git a/pixman/pixman-arma64-neon-asm.S b/pixman/pixman-arma64-neon-asm.S
index 774d98d..107c133 100644
--- a/pixman/pixman-arma64-neon-asm.S
+++ b/pixman/pixman-arma64-neon-asm.S
@@ -267,54 +267,54 @@
sli v4.8h, v4.8h, #5
ushll v14.8h, v17.8b, #7
sli v14.8h, v14.8h, #1
- PF add PF_X, PF_X, #8
+ PF add, PF_X, PF_X, #8
ushll v8.8h, v19.8b, #7
sli v8.8h, v8.8h, #1
- PF tst PF_CTL, #0xF
+ PF tst, PF_CTL, #0xF
sri v6.8b, v6.8b, #5
- PF beq 10f
- PF add PF_X, PF_X, #8
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
10:
mvn v3.8b, v3.8b
- PF beq 10f
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF sub, PF_CTL, PF_CTL, #1
10:
sri v7.8b, v7.8b, #6
shrn v30.8b, v4.8h, #2
umull v10.8h, v3.8b, v6.8b
- PF lsl DUMMY, PF_X, #src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF lsl, DUMMY, PF_X, #src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
umull v11.8h, v3.8b, v7.8b
umull v12.8h, v3.8b, v30.8b
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
sri v14.8h, v8.8h, #5
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
ushll v9.8h, v18.8b, #7
sli v9.8h, v9.8h, #1
urshr v17.8h, v10.8h, #8
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
10:
urshr v19.8h, v11.8h, #8
urshr v18.8h, v12.8h, #8
- PF ble 10f
- PF subs PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
sri v14.8h, v9.8h, #11
mov v28.d[0], v14.d[0]
mov v29.d[0], v14.d[1]
- PF ble 10f
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
+ PF ble, 10f
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
10:
raddhn v20.8b, v10.8h, v17.8h
raddhn v23.8b, v11.8h, v19.8h
- PF ble 10f
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_SRC, #1
+ PF ble, 10f
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_SRC, #1
10:
raddhn v22.8b, v12.8h, v18.8h
st1 {v14.8h}, [DST_W], #16
@@ -474,32 +474,32 @@ generate_composite_function \
.macro pixman_composite_src_8888_0565_process_pixblock_tail_head
sri v14.8h, v8.8h, #5
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
+ PF add, PF_X, PF_X, #8
+ PF tst, PF_CTL, #0xF
fetch_src_pixblock
- PF beq 10f
- PF add PF_X, PF_X, #8
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
+ PF sub, PF_CTL, PF_CTL, #1
10:
sri v14.8h, v9.8h, #11
mov v28.d[0], v14.d[0]
mov v29.d[0], v14.d[1]
- PF cmp PF_X, ORIG_W
- PF lsl DUMMY, PF_X, #src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF cmp, PF_X, ORIG_W
+ PF lsl, DUMMY, PF_X, #src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
ushll v8.8h, v1.8b, #7
sli v8.8h, v8.8h, #1
st1 {v14.8h}, [DST_W], #16
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
- PF subs PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
ushll v14.8h, v2.8b, #7
sli v14.8h, v14.8h, #1
- PF ble 10f
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
+ PF ble, 10f
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
10:
ushll v9.8h, v0.8b, #7
sli v9.8h, v9.8h, #1
@@ -566,31 +566,31 @@ generate_composite_function \
.macro pixman_composite_add_8_8_process_pixblock_tail_head
fetch_src_pixblock
- PF add PF_X, PF_X, #32
- PF tst PF_CTL, #0xF
+ PF add, PF_X, PF_X, #32
+ PF tst, PF_CTL, #0xF
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
- PF beq 10f
- PF add PF_X, PF_X, #32
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF add, PF_X, PF_X, #32
+ PF sub, PF_CTL, PF_CTL, #1
10:
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
- PF cmp PF_X, ORIG_W
- PF lsl DUMMY, PF_X, #src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
- PF subs PF_CTL, PF_CTL, #0x10
+ PF cmp, PF_X, ORIG_W
+ PF lsl, DUMMY, PF_X, #src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
uqadd v28.8b, v0.8b, v4.8b
- PF ble 10f
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_DST, #1
+ PF ble, 10f
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_DST, #1
10:
uqadd v29.8b, v1.8b, v5.8b
uqadd v30.8b, v2.8b, v6.8b
@@ -612,31 +612,31 @@ generate_composite_function \
.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
fetch_src_pixblock
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
+ PF add, PF_X, PF_X, #8
+ PF tst, PF_CTL, #0xF
ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
- PF beq 10f
- PF add PF_X, PF_X, #8
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
+ PF sub, PF_CTL, PF_CTL, #1
10:
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
- PF cmp PF_X, ORIG_W
- PF lsl DUMMY, PF_X, #src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
- PF subs PF_CTL, PF_CTL, #0x10
+ PF cmp, PF_X, ORIG_W
+ PF lsl, DUMMY, PF_X, #src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
uqadd v28.8b, v0.8b, v4.8b
- PF ble 10f
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_DST, #1
+ PF ble, 10f
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_DST, #1
10:
uqadd v29.8b, v1.8b, v5.8b
uqadd v30.8b, v2.8b, v6.8b
@@ -689,45 +689,45 @@ generate_composite_function_single_scanline \
.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
urshr v14.8h, v8.8h, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
+ PF add, PF_X, PF_X, #8
+ PF tst, PF_CTL, #0xF
urshr v15.8h, v9.8h, #8
urshr v16.8h, v10.8h, #8
urshr v17.8h, v11.8h, #8
- PF beq 10f
- PF add PF_X, PF_X, #8
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
+ PF sub, PF_CTL, PF_CTL, #1
10:
raddhn v28.8b, v14.8h, v8.8h
raddhn v29.8b, v15.8h, v9.8h
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
raddhn v30.8b, v16.8h, v10.8h
raddhn v31.8b, v17.8h, v11.8h
fetch_src_pixblock
- PF lsl DUMMY, PF_X, #src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF lsl, DUMMY, PF_X, #src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
mvn v22.8b, v3.8b
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
10:
umull v8.8h, v22.8b, v4.8b
- PF ble 10f
- PF subs PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
umull v9.8h, v22.8b, v5.8b
- PF ble 10f
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
+ PF ble, 10f
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
10:
umull v10.8h, v22.8b, v6.8b
- PF ble 10f
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_DST, #1
+ PF ble, 10f
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_DST, #1
10:
umull v11.8h, v22.8b, v7.8b
.endm
@@ -759,18 +759,18 @@ generate_composite_function_single_scanline \
.macro pixman_composite_over_8888_8888_process_pixblock_tail_head
ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
urshr v14.8h, v8.8h, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
+ PF add, PF_X, PF_X, #8
+ PF tst, PF_CTL, #0xF
urshr v15.8h, v9.8h, #8
urshr v16.8h, v10.8h, #8
urshr v17.8h, v11.8h, #8
- PF beq 10f
- PF add PF_X, PF_X, #8
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
+ PF sub, PF_CTL, PF_CTL, #1
10:
raddhn v28.8b, v14.8h, v8.8h
raddhn v29.8b, v15.8h, v9.8h
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
raddhn v30.8b, v16.8h, v10.8h
raddhn v31.8b, v17.8h, v11.8h
uqadd v28.8b, v0.8b, v28.8b
@@ -778,30 +778,30 @@ generate_composite_function_single_scanline \
uqadd v30.8b, v2.8b, v30.8b
uqadd v31.8b, v3.8b, v31.8b
fetch_src_pixblock
- PF lsl DUMMY, PF_X, #src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF lsl, DUMMY, PF_X, #src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
mvn v22.8b, v3.8b
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
10:
umull v8.8h, v22.8b, v4.8b
- PF ble 10f
- PF subs PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
umull v9.8h, v22.8b, v5.8b
- PF ble 10f
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
+ PF ble, 10f
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
10:
umull v10.8h, v22.8b, v6.8b
- PF ble 10f
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_DST, #1
+ PF ble, 10f
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_DST, #1
10:
umull v11.8h, v22.8b, v7.8b
.endm
@@ -865,30 +865,30 @@ generate_composite_function_single_scanline \
raddhn v31.8b, v17.8h, v11.8h
ld4 {v4.8b, v5.8b, v6.8b, v7.8b}, [DST_R], #32
uqadd v28.8b, v0.8b, v28.8b
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0x0F
- PF beq 10f
- PF add PF_X, PF_X, #8
- PF sub PF_CTL, PF_CTL, #1
+ PF add, PF_X, PF_X, #8
+ PF tst, PF_CTL, #0x0F
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
+ PF sub, PF_CTL, PF_CTL, #1
10:
uqadd v29.8b, v1.8b, v29.8b
uqadd v30.8b, v2.8b, v30.8b
uqadd v31.8b, v3.8b, v31.8b
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
umull v8.8h, v24.8b, v4.8b
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
umull v9.8h, v24.8b, v5.8b
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
10:
umull v10.8h, v24.8b, v6.8b
- PF subs PF_CTL, PF_CTL, #0x10
+ PF subs, PF_CTL, PF_CTL, #0x10
umull v11.8h, v24.8b, v7.8b
- PF ble 10f
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_DST, #1
+ PF ble, 10f
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_DST, #1
10:
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
.endm
@@ -917,18 +917,18 @@ generate_composite_function \
.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
urshr v14.8h, v8.8h, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
+ PF add, PF_X, PF_X, #8
+ PF tst, PF_CTL, #0xF
urshr v15.8h, v9.8h, #8
urshr v12.8h, v10.8h, #8
urshr v13.8h, v11.8h, #8
- PF beq 10f
- PF add PF_X, PF_X, #8
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
+ PF sub, PF_CTL, PF_CTL, #1
10:
raddhn v28.8b, v14.8h, v8.8h
raddhn v29.8b, v15.8h, v9.8h
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
raddhn v30.8b, v12.8h, v10.8h
raddhn v31.8b, v13.8h, v11.8h
uqadd v28.8b, v0.8b, v28.8b
@@ -937,22 +937,22 @@ generate_composite_function \
uqadd v31.8b, v3.8b, v31.8b
ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [DST_R], #32
mvn v22.8b, v3.8b
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
- PF blt 10f
- PF sub PF_X, PF_X, ORIG_W
+ PF blt, 10f
+ PF sub, PF_X, PF_X, ORIG_W
10:
umull v8.8h, v22.8b, v4.8b
- PF blt 10f
- PF subs PF_CTL, PF_CTL, #0x10
+ PF blt, 10f
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
umull v9.8h, v22.8b, v5.8b
umull v10.8h, v22.8b, v6.8b
- PF blt 10f
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_DST, #1
+ PF blt, 10f
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_DST, #1
10:
umull v11.8h, v22.8b, v7.8b
.endm
@@ -1410,35 +1410,35 @@ generate_composite_function \
.macro pixman_composite_src_n_8_8888_process_pixblock_tail_head
fetch_mask_pixblock
- PF add PF_X, PF_X, #8
+ PF add, PF_X, PF_X, #8
rshrn v28.8b, v8.8h, #8
- PF tst PF_CTL, #0x0F
+ PF tst, PF_CTL, #0x0F
rshrn v29.8b, v9.8h, #8
- PF beq 10f
- PF add PF_X, PF_X, #8
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
10:
rshrn v30.8b, v10.8h, #8
- PF beq 10f
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF sub, PF_CTL, PF_CTL, #1
10:
rshrn v31.8b, v11.8h, #8
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
umull v8.8h, v24.8b, v0.8b
- PF lsl DUMMY, PF_X, #mask_bpp_shift
- PF prfm PREFETCH_MODE, [PF_MASK, DUMMY]
+ PF lsl, DUMMY, PF_X, #mask_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_MASK, DUMMY]
umull v9.8h, v24.8b, v1.8b
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
10:
umull v10.8h, v24.8b, v2.8b
- PF ble 10f
- PF subs PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
umull v11.8h, v24.8b, v3.8b
- PF ble 10f
- PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF ldrsb DUMMY, [PF_MASK, DUMMY]
- PF add PF_MASK, PF_MASK, #1
+ PF ble, 10f
+ PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
+ PF ldrsb, DUMMY, [PF_MASK, DUMMY]
+ PF add, PF_MASK, PF_MASK, #1
10:
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
ursra v8.8h, v8.8h, #8
@@ -1491,35 +1491,35 @@ generate_composite_function \
.macro pixman_composite_src_n_8_8_process_pixblock_tail_head
fetch_mask_pixblock
- PF add PF_X, PF_X, #8
+ PF add, PF_X, PF_X, #8
rshrn v28.8b, v0.8h, #8
- PF tst PF_CTL, #0x0F
+ PF tst, PF_CTL, #0x0F
rshrn v29.8b, v1.8h, #8
- PF beq 10f
- PF add PF_X, PF_X, #8
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
10:
rshrn v30.8b, v2.8h, #8
- PF beq 10f
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF sub, PF_CTL, PF_CTL, #1
10:
rshrn v31.8b, v3.8h, #8
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
umull v0.8h, v24.8b, v16.8b
- PF lsl DUMMY, PF_X, mask_bpp_shift
- PF prfm PREFETCH_MODE, [PF_MASK, DUMMY]
+ PF lsl, DUMMY, PF_X, mask_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_MASK, DUMMY]
umull v1.8h, v25.8b, v16.8b
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
10:
umull v2.8h, v26.8b, v16.8b
- PF ble 10f
- PF subs PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
umull v3.8h, v27.8b, v16.8b
- PF ble 10f
- PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF ldrsb DUMMY, [PF_MASK, DUMMY]
- PF add PF_MASK, PF_MASK, #1
+ PF ble, 10f
+ PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
+ PF ldrsb, DUMMY, [PF_MASK, DUMMY]
+ PF add, PF_MASK, PF_MASK, #1
10:
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
ursra v0.8h, v0.8h, #8
@@ -1599,44 +1599,44 @@ generate_composite_function \
urshr v17.8h, v13.8h, #8
fetch_mask_pixblock
urshr v18.8h, v14.8h, #8
- PF add PF_X, PF_X, #8
+ PF add, PF_X, PF_X, #8
urshr v19.8h, v15.8h, #8
- PF tst PF_CTL, #0x0F
+ PF tst, PF_CTL, #0x0F
raddhn v28.8b, v16.8h, v12.8h
- PF beq 10f
- PF add PF_X, PF_X, #8
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
10:
raddhn v29.8b, v17.8h, v13.8h
- PF beq 10f
- PF sub PF_CTL, PF_CTL, #1
+ PF beq, 10f
+ PF sub, PF_CTL, PF_CTL, #1
10:
raddhn v30.8b, v18.8h, v14.8h
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
raddhn v31.8b, v19.8h, v15.8h
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
umull v16.8h, v24.8b, v8.8b
- PF lsl DUMMY, PF_X, #mask_bpp_shift
- PF prfm PREFETCH_MODE, [PF_MASK, DUMMY]
+ PF lsl, DUMMY, PF_X, #mask_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_MASK, DUMMY]
umull v17.8h, v24.8b, v9.8b
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
10:
umull v18.8h, v24.8b, v10.8b
- PF ble 10f
- PF subs PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF subs, PF_CTL, PF_CTL, #0x10
10:
umull v19.8h, v24.8b, v11.8b
- PF ble 10f
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_DST, #1
+ PF ble, 10f
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_DST, #1
10:
uqadd v28.8b, v0.8b, v28.8b
- PF ble 10f
- PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF ldrsb DUMMY, [PF_MASK, DUMMY]
- PF add PF_MASK, PF_MASK, #1
+ PF ble, 10f
+ PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
+ PF ldrsb, DUMMY, [PF_MASK, DUMMY]
+ PF add, PF_MASK, PF_MASK, #1
10:
uqadd v29.8b, v1.8b, v29.8b
uqadd v30.8b, v2.8b, v30.8b
@@ -2412,7 +2412,7 @@ generate_composite_function_single_scanline \
default_cleanup_need_all_regs, \
pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \
pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \
- pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \
+ pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head, \
28, /* dst_w_basereg */ \
4, /* dst_r_basereg */ \
0, /* src_basereg */ \
@@ -2487,7 +2487,7 @@ generate_composite_function \
default_cleanup_need_all_regs, \
pixman_composite_over_8888_n_8888_process_pixblock_head, \
pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head, \
28, /* dst_w_basereg */ \
4, /* dst_r_basereg */ \
0, /* src_basereg */ \
@@ -2501,7 +2501,7 @@ generate_composite_function_single_scanline \
default_cleanup_need_all_regs, \
pixman_composite_over_8888_n_8888_process_pixblock_head, \
pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head, \
28, /* dst_w_basereg */ \
4, /* dst_r_basereg */ \
0, /* src_basereg */ \
@@ -2529,7 +2529,7 @@ generate_composite_function \
default_cleanup_need_all_regs, \
pixman_composite_over_8888_n_8888_process_pixblock_head, \
pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
+ pixman_composite_over_8888_8_8888_process_pixblock_tail_head, \
28, /* dst_w_basereg */ \
4, /* dst_r_basereg */ \
0, /* src_basereg */ \
@@ -2680,11 +2680,11 @@ generate_composite_function \
urshr v13.8h, v10.8h, #8
fetch_src_pixblock
raddhn v30.8b, v11.8h, v8.8h
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- PF beq 10f
- PF add PF_X, PF_X, #8
- PF sub PF_CTL, PF_CTL, #1
+ PF add, PF_X, PF_X, #8
+ PF tst, PF_CTL, #0xF
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
+ PF sub, PF_CTL, PF_CTL, #1
10:
raddhn v29.8b, v12.8h, v9.8h
raddhn v28.8b, v13.8h, v10.8h
@@ -2692,16 +2692,16 @@ generate_composite_function \
umull v9.8h, v3.8b, v1.8b
umull v10.8h, v3.8b, v2.8b
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
- PF cmp PF_X, ORIG_W
- PF lsl DUMMY, PF_X, src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
- PF subs PF_CTL, PF_CTL, #0x10
- PF ble 10f
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
+ PF cmp, PF_X, ORIG_W
+ PF lsl, DUMMY, PF_X, src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
+ PF subs, PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
10:
.endm
@@ -2749,11 +2749,11 @@ generate_composite_function \
urshr v13.8h, v10.8h, #8
fetch_src_pixblock
raddhn v28.8b, v11.8h, v8.8h
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- PF beq 10f
- PF add PF_X, PF_X, #8
- PF sub PF_CTL, PF_CTL, #1
+ PF add, PF_X, PF_X, #8
+ PF tst, PF_CTL, #0xF
+ PF beq, 10f
+ PF add, PF_X, PF_X, #8
+ PF sub, PF_CTL, PF_CTL, #1
10:
raddhn v29.8b, v12.8h, v9.8h
raddhn v30.8b, v13.8h, v10.8h
@@ -2761,16 +2761,16 @@ generate_composite_function \
umull v9.8h, v3.8b, v1.8b
umull v10.8h, v3.8b, v2.8b
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
- PF cmp PF_X, ORIG_W
- PF lsl DUMMY, PF_X, src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
- PF ble 10f
- PF sub PF_X, PF_X, ORIG_W
- PF subs PF_CTL, PF_CTL, #0x10
- PF ble 10f
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
+ PF cmp, PF_X, ORIG_W
+ PF lsl, DUMMY, PF_X, src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF ble, 10f
+ PF sub, PF_X, PF_X, ORIG_W
+ PF subs, PF_CTL, PF_CTL, #0x10
+ PF ble, 10f
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
10:
.endm
@@ -3131,53 +3131,53 @@ generate_composite_function_nearest_scanline \
asr TMP1, X, #16
add X, X, UX
add TMP1, TOP, TMP1, lsl #2
- ld1 {&reg1&.2s}, [TMP1], STRIDE
- ld1 {&reg2&.2s}, [TMP1]
+ ld1 {\()\reg1\().2s}, [TMP1], STRIDE
+ ld1 {\()\reg2\().2s}, [TMP1]
.endm
.macro bilinear_load_0565 reg1, reg2, tmp
asr TMP1, X, #16
add X, X, UX
add TMP1, TOP, TMP1, lsl #1
- ld1 {&reg2&.s}[0], [TMP1], STRIDE
- ld1 {&reg2&.s}[1], [TMP1]
- convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+ ld1 {\()\reg2\().s}[0], [TMP1], STRIDE
+ ld1 {\()\reg2\().s}[1], [TMP1]
+ convert_four_0565_to_x888_packed \reg2, \reg1, \reg2, \tmp
.endm
.macro bilinear_load_and_vertical_interpolate_two_8888 \
acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
- bilinear_load_8888 reg1, reg2, tmp1
- umull &acc1&.8h, &reg1&.8b, v28.8b
- umlal &acc1&.8h, &reg2&.8b, v29.8b
- bilinear_load_8888 reg3, reg4, tmp2
- umull &acc2&.8h, &reg3&.8b, v28.8b
- umlal &acc2&.8h, &reg4&.8b, v29.8b
+ bilinear_load_8888 \reg1, \reg2, \tmp1
+ umull \()\acc1\().8h, \()\reg1\().8b, v28.8b
+ umlal \()\acc1\().8h, \()\reg2\().8b, v29.8b
+ bilinear_load_8888 \reg3, \reg4, \tmp2
+ umull \()\acc2\().8h, \()\reg3\().8b, v28.8b
+ umlal \()\acc2\().8h, \()\reg4\().8b, v29.8b
.endm
.macro bilinear_load_and_vertical_interpolate_four_8888 \
- xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi, \
yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
bilinear_load_and_vertical_interpolate_two_8888 \
- xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
+ \xacc1, \xacc2, \xreg1, \xreg2, \xreg3, \xreg4, \xacc2lo, \xacc2hi
bilinear_load_and_vertical_interpolate_two_8888 \
- yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+ \yacc1, \yacc2, \yreg1, \yreg2, \yreg3, \yreg4, \yacc2lo, \yacc2hi
.endm
.macro vzip reg1, reg2
umov TMP4, v31.d[0]
- zip1 v31.8b, reg1, reg2
- zip2 reg2, reg1, reg2
- mov reg1, v31.8b
+ zip1 v31.8b, \reg1, \reg2
+ zip2 \reg2, \reg1, \reg2
+ mov \reg1, v31.8b
mov v31.d[0], TMP4
.endm
.macro vuzp reg1, reg2
umov TMP4, v31.d[0]
- uzp1 v31.8b, reg1, reg2
- uzp2 reg2, reg1, reg2
- mov reg1, v31.8b
+ uzp1 v31.8b, \reg1, \reg2
+ uzp2 \reg2, \reg1, \reg2
+ mov \reg1, v31.8b
mov v31.d[0], TMP4
.endm
@@ -3189,23 +3189,23 @@ generate_composite_function_nearest_scanline \
asr TMP2, X, #16
add X, X, UX
add TMP2, TOP, TMP2, lsl #1
- ld1 {&acc2&.s}[0], [TMP1], STRIDE
- ld1 {&acc2&.s}[2], [TMP2], STRIDE
- ld1 {&acc2&.s}[1], [TMP1]
- ld1 {&acc2&.s}[3], [TMP2]
- convert_0565_to_x888 acc2, reg3, reg2, reg1
- vzip &reg1&.8b, &reg3&.8b
- vzip &reg2&.8b, &reg4&.8b
- vzip &reg3&.8b, &reg4&.8b
- vzip &reg1&.8b, &reg2&.8b
- umull &acc1&.8h, &reg1&.8b, v28.8b
- umlal &acc1&.8h, &reg2&.8b, v29.8b
- umull &acc2&.8h, &reg3&.8b, v28.8b
- umlal &acc2&.8h, &reg4&.8b, v29.8b
+ ld1 {\()\acc2\().s}[0], [TMP1], STRIDE
+ ld1 {\()\acc2\().s}[2], [TMP2], STRIDE
+ ld1 {\()\acc2\().s}[1], [TMP1]
+ ld1 {\()\acc2\().s}[3], [TMP2]
+ convert_0565_to_x888 \acc2, \reg3, \reg2, \reg1
+ vzip \()\reg1\().8b, \()\reg3\().8b
+ vzip \()\reg2\().8b, \()\reg4\().8b
+ vzip \()\reg3\().8b, \()\reg4\().8b
+ vzip \()\reg1\().8b, \()\reg2\().8b
+ umull \()\acc1\().8h, \()\reg1\().8b, v28.8b
+ umlal \()\acc1\().8h, \()\reg2\().8b, v29.8b
+ umull \()\acc2\().8h, \()\reg3\().8b, v28.8b
+ umlal \()\acc2\().8h, \()\reg4\().8b, v29.8b
.endm
.macro bilinear_load_and_vertical_interpolate_four_0565 \
- xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi, \
yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
asr TMP1, X, #16
add X, X, UX
@@ -3213,49 +3213,49 @@ generate_composite_function_nearest_scanline \
asr TMP2, X, #16
add X, X, UX
add TMP2, TOP, TMP2, lsl #1
- ld1 {&xacc2&.s}[0], [TMP1], STRIDE
- ld1 {&xacc2&.s}[2], [TMP2], STRIDE
- ld1 {&xacc2&.s}[1], [TMP1]
- ld1 {&xacc2&.s}[3], [TMP2]
- convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
+ ld1 {\()\xacc2\().s}[0], [TMP1], STRIDE
+ ld1 {\()\xacc2\().s}[2], [TMP2], STRIDE
+ ld1 {\()\xacc2\().s}[1], [TMP1]
+ ld1 {\()\xacc2\().s}[3], [TMP2]
+ convert_0565_to_x888 \xacc2, \xreg3, \xreg2, \xreg1
asr TMP1, X, #16
add X, X, UX
add TMP1, TOP, TMP1, lsl #1
asr TMP2, X, #16
add X, X, UX
add TMP2, TOP, TMP2, lsl #1
- ld1 {&yacc2&.s}[0], [TMP1], STRIDE
- vzip &xreg1&.8b, &xreg3&.8b
- ld1 {&yacc2&.s}[2], [TMP2], STRIDE
- vzip &xreg2&.8b, &xreg4&.8b
- ld1 {&yacc2&.s}[1], [TMP1]
- vzip &xreg3&.8b, &xreg4&.8b
- ld1 {&yacc2&.s}[3], [TMP2]
- vzip &xreg1&.8b, &xreg2&.8b
- convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
- umull &xacc1&.8h, &xreg1&.8b, v28.8b
- vzip &yreg1&.8b, &yreg3&.8b
- umlal &xacc1&.8h, &xreg2&.8b, v29.8b
- vzip &yreg2&.8b, &yreg4&.8b
- umull &xacc2&.8h, &xreg3&.8b, v28.8b
- vzip &yreg3&.8b, &yreg4&.8b
- umlal &xacc2&.8h, &xreg4&.8b, v29.8b
- vzip &yreg1&.8b, &yreg2&.8b
- umull &yacc1&.8h, &yreg1&.8b, v28.8b
- umlal &yacc1&.8h, &yreg2&.8b, v29.8b
- umull &yacc2&.8h, &yreg3&.8b, v28.8b
- umlal &yacc2&.8h, &yreg4&.8b, v29.8b
+ ld1 {\()\yacc2\().s}[0], [TMP1], STRIDE
+ vzip \()\xreg1\().8b, \()\xreg3\().8b
+ ld1 {\()\yacc2\().s}[2], [TMP2], STRIDE
+ vzip \()\xreg2\().8b, \()\xreg4\().8b
+ ld1 {\()\yacc2\().s}[1], [TMP1]
+ vzip \()\xreg3\().8b, \()\xreg4\().8b
+ ld1 {\()\yacc2\().s}[3], [TMP2]
+ vzip \()\xreg1\().8b, \()\xreg2\().8b
+ convert_0565_to_x888 \yacc2, \yreg3, \yreg2, \yreg1
+ umull \()\xacc1\().8h, \()\xreg1\().8b, v28.8b
+ vzip \()\yreg1\().8b, \()\yreg3\().8b
+ umlal \()\xacc1\().8h, \()\xreg2\().8b, v29.8b
+ vzip \()\yreg2\().8b, \()\yreg4\().8b
+ umull \()\xacc2\().8h, \()\xreg3\().8b, v28.8b
+ vzip \()\yreg3\().8b, \()\yreg4\().8b
+ umlal \()\xacc2\().8h, \()\xreg4\().8b, v29.8b
+ vzip \()\yreg1\().8b, \()\yreg2\().8b
+ umull \()\yacc1\().8h, \()\yreg1\().8b, v28.8b
+ umlal \()\yacc1\().8h, \()\yreg2\().8b, v29.8b
+ umull \()\yacc2\().8h, \()\yreg3\().8b, v28.8b
+ umlal \()\yacc2\().8h, \()\yreg4\().8b, v29.8b
.endm
.macro bilinear_store_8888 numpix, tmp1, tmp2
-.if numpix == 4
+.if \numpix == 4
st1 {v0.2s, v1.2s}, [OUT], #16
-.elseif numpix == 2
+.elseif \numpix == 2
st1 {v0.2s}, [OUT], #8
-.elseif numpix == 1
+.elseif \numpix == 1
st1 {v0.s}[0], [OUT], #4
.else
- .error bilinear_store_8888 numpix is unsupported
+ .error bilinear_store_8888 \numpix is unsupported
.endif
.endm
@@ -3264,20 +3264,20 @@ generate_composite_function_nearest_scanline \
vuzp v2.8b, v3.8b
vuzp v1.8b, v3.8b
vuzp v0.8b, v2.8b
- convert_8888_to_0565 v2, v1, v0, v1, tmp1, tmp2
-.if numpix == 4
+ convert_8888_to_0565 v2, v1, v0, v1, \tmp1, \tmp2
+.if \numpix == 4
st1 {v1.4h}, [OUT], #8
-.elseif numpix == 2
+.elseif \numpix == 2
st1 {v1.s}[0], [OUT], #4
-.elseif numpix == 1
+.elseif \numpix == 1
st1 {v1.h}[0], [OUT], #2
.else
- .error bilinear_store_0565 numpix is unsupported
+ .error bilinear_store_0565 \numpix is unsupported
.endif
.endm
.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
- bilinear_load_&src_fmt v0, v1, v2
+ bilinear_load_\()\src_fmt v0, v1, v2
umull v2.8h, v0.8b, v28.8b
umlal v2.8h, v1.8b, v29.8b
/* 5 cycles bubble */
@@ -3289,11 +3289,11 @@ generate_composite_function_nearest_scanline \
/* 3 cycles bubble */
xtn v0.8b, v0.8h
/* 1 cycle bubble */
- bilinear_store_&dst_fmt 1, v3, v4
+ bilinear_store_\()\dst_fmt 1, v3, v4
.endm
.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
- bilinear_load_and_vertical_interpolate_two_&src_fmt \
+ bilinear_load_and_vertical_interpolate_two_\()\src_fmt \
v1, v11, v2, v3, v20, v21, v22, v23
ushll v0.4s, v1.4h, #BILINEAR_INTERPOLATION_BITS
umlsl v0.4s, v1.4h, v15.h[0]
@@ -3306,12 +3306,12 @@ generate_composite_function_nearest_scanline \
ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
add v12.8h, v12.8h, v13.8h
xtn v0.8b, v0.8h
- bilinear_store_&dst_fmt 2, v3, v4
+ bilinear_store_\()\dst_fmt 2, v3, v4
.endm
.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
- bilinear_load_and_vertical_interpolate_four_&src_fmt \
- v1, v11, v14, v20, v16, v17, v22, v23 \
+ bilinear_load_and_vertical_interpolate_four_\()\src_fmt \
+ v1, v11, v14, v20, v16, v17, v22, v23, \
v3, v9, v24, v25, v26, v27, v18, v19
prfm PREFETCH_MODE, [TMP1, PF_OFFS]
sub TMP1, TMP1, STRIDE
@@ -3338,54 +3338,54 @@ generate_composite_function_nearest_scanline \
xtn v0.8b, v0.8h
xtn v1.8b, v2.8h
add v12.8h, v12.8h, v13.8h
- bilinear_store_&dst_fmt 4, v3, v4
+ bilinear_store_\()\dst_fmt 4, v3, v4
.endm
.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
- bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head
+.ifdef have_bilinear_interpolate_four_pixels_\()\src_fmt\()_\()\dst_fmt
+ bilinear_interpolate_four_pixels_\()\src_fmt\()_\()\dst_fmt\()_head
.else
- bilinear_interpolate_four_pixels src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels \src_fmt, \dst_fmt
.endif
.endm
.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
- bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail
+.ifdef have_bilinear_interpolate_four_pixels_\()\src_fmt\()_\()\dst_fmt
+ bilinear_interpolate_four_pixels_\()\src_fmt\()_\()\dst_fmt\()_tail
.endif
.endm
.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
- bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head
+.ifdef have_bilinear_interpolate_four_pixels_\()\src_fmt\()_\()\dst_fmt
+ bilinear_interpolate_four_pixels_\()\src_fmt\()_\()\dst_fmt\()_tail_head
.else
- bilinear_interpolate_four_pixels src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels \src_fmt, \dst_fmt
.endif
.endm
.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
- bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head
+.ifdef have_bilinear_interpolate_eight_pixels_\()\src_fmt\()_\()\dst_fmt
+ bilinear_interpolate_eight_pixels_\()\src_fmt\()_\()\dst_fmt\()_head
.else
- bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
- bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels_head \src_fmt, \dst_fmt
+ bilinear_interpolate_four_pixels_tail_head \src_fmt, \dst_fmt
.endif
.endm
.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
- bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail
+.ifdef have_bilinear_interpolate_eight_pixels_\()\src_fmt\()_\()\dst_fmt
+ bilinear_interpolate_eight_pixels_\()\src_fmt\()_\()\dst_fmt\()_tail
.else
- bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels_tail \src_fmt, \dst_fmt
.endif
.endm
.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
-.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
- bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head
+.ifdef have_bilinear_interpolate_eight_pixels_\()\src_fmt\()_\()\dst_fmt
+ bilinear_interpolate_eight_pixels_\()\src_fmt\()_\()\dst_fmt\()_tail_head
.else
- bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
- bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels_tail_head \src_fmt, \dst_fmt
+ bilinear_interpolate_four_pixels_tail_head \src_fmt, \dst_fmt
.endif
.endm
@@ -3410,7 +3410,7 @@ generate_composite_function_nearest_scanline \
src_bpp_shift, dst_bpp_shift, \
prefetch_distance, flags
-pixman_asm_function fname
+pixman_asm_function \fname
OUT .req x0
TOP .req x1
BOTTOM .req x2
@@ -3442,7 +3442,7 @@ pixman_asm_function fname
stp x10, x11, [x29, -96]
stp x12, x13, [x29, -112]
- mov PF_OFFS, #prefetch_distance
+ mov PF_OFFS, #\prefetch_distance
mul PF_OFFS, PF_OFFS, UX
subs STRIDE, BOTTOM, TOP
@@ -3463,11 +3463,11 @@ pixman_asm_function fname
/* ensure good destination alignment */
cmp WIDTH, #1
blt 100f
- tst OUT, #(1 << dst_bpp_shift)
+ tst OUT, #(1 << \dst_bpp_shift)
beq 100f
ushr v15.8h, v12.8h, #(16 - BILINEAR_INTERPOLATION_BITS)
add v12.8h, v12.8h, v13.8h
- bilinear_interpolate_last_pixel src_fmt, dst_fmt
+ bilinear_interpolate_last_pixel \src_fmt, \dst_fmt
sub WIDTH, WIDTH, #1
100:
add v13.8h, v13.8h, v13.8h
@@ -3476,62 +3476,62 @@ pixman_asm_function fname
cmp WIDTH, #2
blt 100f
- tst OUT, #(1 << (dst_bpp_shift + 1))
+ tst OUT, #(1 << (\dst_bpp_shift + 1))
beq 100f
- bilinear_interpolate_two_pixels src_fmt, dst_fmt
+ bilinear_interpolate_two_pixels \src_fmt, \dst_fmt
sub WIDTH, WIDTH, #2
100:
-.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0
+.if ((\flags) & BILINEAR_FLAG_UNROLL_8) != 0
/*********** 8 pixels per iteration *****************/
cmp WIDTH, #4
blt 100f
- tst OUT, #(1 << (dst_bpp_shift + 2))
+ tst OUT, #(1 << (\dst_bpp_shift + 2))
beq 100f
- bilinear_interpolate_four_pixels src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels \src_fmt, \dst_fmt
sub WIDTH, WIDTH, #4
100:
subs WIDTH, WIDTH, #8
blt 100f
- asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
- bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
+ asr PF_OFFS, PF_OFFS, #(16 - \src_bpp_shift)
+ bilinear_interpolate_eight_pixels_head \src_fmt, \dst_fmt
subs WIDTH, WIDTH, #8
blt 500f
1000:
- bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
+ bilinear_interpolate_eight_pixels_tail_head \src_fmt, \dst_fmt
subs WIDTH, WIDTH, #8
bge 1000b
500:
- bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
+ bilinear_interpolate_eight_pixels_tail \src_fmt, \dst_fmt
100:
tst WIDTH, #4
beq 200f
- bilinear_interpolate_four_pixels src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels \src_fmt, \dst_fmt
200:
.else
/*********** 4 pixels per iteration *****************/
subs WIDTH, WIDTH, #4
blt 100f
- asr PF_OFFS, PF_OFFS, #(16 - src_bpp_shift)
- bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
+ asr PF_OFFS, PF_OFFS, #(16 - \src_bpp_shift)
+ bilinear_interpolate_four_pixels_head \src_fmt, \dst_fmt
subs WIDTH, WIDTH, #4
blt 500f
1000:
- bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels_tail_head \src_fmt, \dst_fmt
subs WIDTH, WIDTH, #4
bge 1000b
500:
- bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
+ bilinear_interpolate_four_pixels_tail \src_fmt, \dst_fmt
100:
/****************************************************/
.endif
/* handle the remaining trailing pixels */
tst WIDTH, #2
beq 200f
- bilinear_interpolate_two_pixels src_fmt, dst_fmt
+ bilinear_interpolate_two_pixels \src_fmt, \dst_fmt
200:
tst WIDTH, #1
beq 300f
- bilinear_interpolate_last_pixel src_fmt, dst_fmt
+ bilinear_interpolate_last_pixel \src_fmt, \dst_fmt
300:
sub x29, x29, 64
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], #32
@@ -3556,7 +3556,7 @@ pixman_asm_function fname
.unreq TMP3
.unreq TMP4
.unreq STRIDE
-.endfunc
+pixman_end_asm_function
.endm
diff --git a/pixman/pixman-arma64-neon-asm.h b/pixman/pixman-arma64-neon-asm.h
index 5d93172..6aa6838 100644
--- a/pixman/pixman-arma64-neon-asm.h
+++ b/pixman/pixman-arma64-neon-asm.h
@@ -80,146 +80,146 @@
*/
.macro pixldst1 op, elem_size, reg1, mem_operand, abits
- op {v&reg1&.&elem_size}, [&mem_operand&], #8
+ \op {v\()\reg1\().\()\elem_size}, [\()\mem_operand\()], #8
.endm
.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits
- op {v&reg1&.&elem_size, v&reg2&.&elem_size}, [&mem_operand&], #16
+ \op {v\()\reg1\().\()\elem_size, v\()\reg2\().\()\elem_size}, [\()\mem_operand\()], #16
.endm
.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits
- op {v&reg1&.&elem_size, v&reg2&.&elem_size, v&reg3&.&elem_size, v&reg4&.&elem_size}, [&mem_operand&], #32
+ \op {v\()\reg1\().\()\elem_size, v\()\reg2\().\()\elem_size, v\()\reg3\().\()\elem_size, v\()\reg4\().\()\elem_size}, [\()\mem_operand\()], #32
.endm
.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits, bytes
- op {v&reg1&.&elem_size}[idx], [&mem_operand&], #&bytes&
+ \op {v\()\reg1\().\()\elem_size}[\idx], [\()\mem_operand\()], #\()\bytes\()
.endm
.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
- op {v&reg1&.&elem_size, v&reg2&.&elem_size, v&reg3&.&elem_size}, [&mem_operand&], #24
+ \op {v\()\reg1\().\()\elem_size, v\()\reg2\().\()\elem_size, v\()\reg3\().\()\elem_size}, [\()\mem_operand\()], #24
.endm
.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
- op {v&reg1&.&elem_size, v&reg2&.&elem_size, v&reg3&.&elem_size}[idx], [&mem_operand&], #3
+ \op {v\()\reg1\().\()\elem_size, v\()\reg2\().\()\elem_size, v\()\reg3\().\()\elem_size}[\idx], [\()\mem_operand\()], #3
.endm
.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits
-.if numbytes == 32
- .if elem_size==32
- pixldst4 op, 2s, %(basereg+4), %(basereg+5), \
- %(basereg+6), %(basereg+7), mem_operand, abits
- .elseif elem_size==16
- pixldst4 op, 4h, %(basereg+4), %(basereg+5), \
- %(basereg+6), %(basereg+7), mem_operand, abits
+.if \numbytes == 32
+ .if \elem_size==32
+ pixldst4 \op, 2s, %(\basereg+4), %(\basereg+5), \
+ %(\basereg+6), %(\basereg+7), \mem_operand, \abits
+ .elseif \elem_size==16
+ pixldst4 \op, 4h, %(\basereg+4), %(\basereg+5), \
+ %(\basereg+6), %(\basereg+7), \mem_operand, \abits
.else
- pixldst4 op, 8b, %(basereg+4), %(basereg+5), \
- %(basereg+6), %(basereg+7), mem_operand, abits
+ pixldst4 \op, 8b, %(\basereg+4), %(\basereg+5), \
+ %(\basereg+6), %(\basereg+7), \mem_operand, \abits
.endif
-.elseif numbytes == 16
- .if elem_size==32
- pixldst2 op, 2s, %(basereg+2), %(basereg+3), mem_operand, abits
- .elseif elem_size==16
- pixldst2 op, 4h, %(basereg+2), %(basereg+3), mem_operand, abits
+.elseif \numbytes == 16
+ .if \elem_size==32
+ pixldst2 \op, 2s, %(\basereg+2), %(\basereg+3), \mem_operand, \abits
+ .elseif \elem_size==16
+ pixldst2 \op, 4h, %(\basereg+2), %(\basereg+3), \mem_operand, \abits
.else
- pixldst2 op, 8b, %(basereg+2), %(basereg+3), mem_operand, abits
+ pixldst2 \op, 8b, %(\basereg+2), %(\basereg+3), \mem_operand, \abits
.endif
-.elseif numbytes == 8
- .if elem_size==32
- pixldst1 op, 2s, %(basereg+1), mem_operand, abits
- .elseif elem_size==16
- pixldst1 op, 4h, %(basereg+1), mem_operand, abits
+.elseif \numbytes == 8
+ .if \elem_size==32
+ pixldst1 \op, 2s, %(\basereg+1), \mem_operand, \abits
+ .elseif \elem_size==16
+ pixldst1 \op, 4h, %(\basereg+1), \mem_operand, \abits
.else
- pixldst1 op, 8b, %(basereg+1), mem_operand, abits
+ pixldst1 \op, 8b, %(\basereg+1), \mem_operand, \abits
.endif
-.elseif numbytes == 4
- .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32)
- pixldst0 op, s, %(basereg+0), 1, mem_operand, abits, 4
- .elseif elem_size == 16
- pixldst0 op, h, %(basereg+0), 2, mem_operand, abits, 2
- pixldst0 op, h, %(basereg+0), 3, mem_operand, abits, 2
+.elseif \numbytes == 4
+ .if !RESPECT_STRICT_ALIGNMENT || (\elem_size == 32)
+ pixldst0 \op, s, %(\basereg+0), 1, \mem_operand, \abits, 4
+ .elseif \elem_size == 16
+ pixldst0 \op, h, %(\basereg+0), 2, \mem_operand, \abits, 2
+ pixldst0 \op, h, %(\basereg+0), 3, \mem_operand, \abits, 2
.else
- pixldst0 op, b, %(basereg+0), 4, mem_operand, abits, 1
- pixldst0 op, b, %(basereg+0), 5, mem_operand, abits, 1
- pixldst0 op, b, %(basereg+0), 6, mem_operand, abits, 1
- pixldst0 op, b, %(basereg+0), 7, mem_operand, abits, 1
+ pixldst0 \op, b, %(\basereg+0), 4, \mem_operand, \abits, 1
+ pixldst0 \op, b, %(\basereg+0), 5, \mem_operand, \abits, 1
+ pixldst0 \op, b, %(\basereg+0), 6, \mem_operand, \abits, 1
+ pixldst0 \op, b, %(\basereg+0), 7, \mem_operand, \abits, 1
.endif
-.elseif numbytes == 2
- .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16)
- pixldst0 op, h, %(basereg+0), 1, mem_operand, abits, 2
+.elseif \numbytes == 2
+ .if !RESPECT_STRICT_ALIGNMENT || (\elem_size == 16)
+ pixldst0 \op, h, %(\basereg+0), 1, \mem_operand, \abits, 2
.else
- pixldst0 op, b, %(basereg+0), 2, mem_operand, abits, 1
- pixldst0 op, b, %(basereg+0), 3, mem_operand, abits, 1
+ pixldst0 \op, b, %(\basereg+0), 2, \mem_operand, \abits, 1
+ pixldst0 \op, b, %(\basereg+0), 3, \mem_operand, \abits, 1
.endif
-.elseif numbytes == 1
- pixldst0 op, b, %(basereg+0), 1, mem_operand, abits, 1
+.elseif \numbytes == 1
+ pixldst0 \op, b, %(\basereg+0), 1, \mem_operand, \abits, 1
.else
- .error "unsupported size: numbytes"
+ .error "unsupported size: \numbytes"
.endif
.endm
.macro pixld numpix, bpp, basereg, mem_operand, abits=0
-.if bpp > 0
-.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
- pixldst4 ld4, 8b, %(basereg+4), %(basereg+5), \
- %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif (bpp == 24) && (numpix == 8)
- pixldst3 ld3, 8b, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
-.elseif (bpp == 24) && (numpix == 4)
- pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
- pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
- pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
- pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
-.elseif (bpp == 24) && (numpix == 2)
- pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
- pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
-.elseif (bpp == 24) && (numpix == 1)
- pixldst30 ld3, b, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
+.if \bpp > 0
+.if (\bpp == 32) && (\numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ pixldst4 ld4, 8b, %(\basereg+4), %(\basereg+5), \
+ %(\basereg+6), %(\basereg+7), \mem_operand, \abits
+.elseif (\bpp == 24) && (\numpix == 8)
+ pixldst3 ld3, 8b, %(\basereg+3), %(\basereg+4), %(\basereg+5), \mem_operand
+.elseif (\bpp == 24) && (\numpix == 4)
+ pixldst30 ld3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 4, \mem_operand
+ pixldst30 ld3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 5, \mem_operand
+ pixldst30 ld3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 6, \mem_operand
+ pixldst30 ld3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 7, \mem_operand
+.elseif (\bpp == 24) && (\numpix == 2)
+ pixldst30 ld3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 2, \mem_operand
+ pixldst30 ld3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 3, \mem_operand
+.elseif (\bpp == 24) && (\numpix == 1)
+ pixldst30 ld3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 1, \mem_operand
.else
- pixldst %(numpix * bpp / 8), ld1, %(bpp), basereg, mem_operand, abits
+ pixldst %(\numpix * \bpp / 8), ld1, %(\bpp), \basereg, \mem_operand, \abits
.endif
.endif
.endm
.macro pixst numpix, bpp, basereg, mem_operand, abits=0
-.if bpp > 0
-.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
- pixldst4 st4, 8b, %(basereg+4), %(basereg+5), \
- %(basereg+6), %(basereg+7), mem_operand, abits
-.elseif (bpp == 24) && (numpix == 8)
- pixldst3 st3, 8b, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
-.elseif (bpp == 24) && (numpix == 4)
- pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
- pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
- pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
- pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
-.elseif (bpp == 24) && (numpix == 2)
- pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
- pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
-.elseif (bpp == 24) && (numpix == 1)
- pixldst30 st3, b, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
-.elseif numpix * bpp == 32 && abits == 32
- pixldst 4, st1, 32, basereg, mem_operand, abits
-.elseif numpix * bpp == 16 && abits == 16
- pixldst 2, st1, 16, basereg, mem_operand, abits
+.if \bpp > 0
+.if (\bpp == 32) && (\numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ pixldst4 st4, 8b, %(\basereg+4), %(\basereg+5), \
+ %(\basereg+6), %(\basereg+7), \mem_operand, \abits
+.elseif (\bpp == 24) && (\numpix == 8)
+ pixldst3 st3, 8b, %(\basereg+3), %(\basereg+4), %(\basereg+5), \mem_operand
+.elseif (\bpp == 24) && (\numpix == 4)
+ pixldst30 st3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 4, \mem_operand
+ pixldst30 st3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 5, \mem_operand
+ pixldst30 st3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 6, \mem_operand
+ pixldst30 st3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 7, \mem_operand
+.elseif (\bpp == 24) && (\numpix == 2)
+ pixldst30 st3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 2, \mem_operand
+ pixldst30 st3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 3, \mem_operand
+.elseif (\bpp == 24) && (\numpix == 1)
+ pixldst30 st3, b, %(\basereg+0), %(\basereg+1), %(\basereg+2), 1, \mem_operand
+.elseif \numpix * \bpp == 32 && \abits == 32
+ pixldst 4, st1, 32, \basereg, \mem_operand, \abits
+.elseif \numpix * \bpp == 16 && \abits == 16
+ pixldst 2, st1, 16, \basereg, \mem_operand, \abits
.else
- pixldst %(numpix * bpp / 8), st1, %(bpp), basereg, mem_operand, abits
+ pixldst %(\numpix * \bpp / 8), st1, %(\bpp), \basereg, \mem_operand, \abits
.endif
.endif
.endm
.macro pixld_a numpix, bpp, basereg, mem_operand
-.if (bpp * numpix) <= 128
- pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix)
+.if (\bpp * \numpix) <= 128
+ pixld \numpix, \bpp, \basereg, \mem_operand, %(\bpp * \numpix)
.else
- pixld numpix, bpp, basereg, mem_operand, 128
+ pixld \numpix, \bpp, \basereg, \mem_operand, 128
.endif
.endm
.macro pixst_a numpix, bpp, basereg, mem_operand
-.if (bpp * numpix) <= 128
- pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
+.if (\bpp * \numpix) <= 128
+ pixst \numpix, \bpp, \basereg, \mem_operand, %(\bpp * \numpix)
.else
- pixst numpix, bpp, basereg, mem_operand, 128
+ pixst \numpix, \bpp, \basereg, \mem_operand, 128
.endif
.endm
@@ -228,96 +228,96 @@
* aliases to be defined)
*/
.macro pixld1_s elem_size, reg1, mem_operand
-.if elem_size == 16
+.if \elem_size == 16
asr TMP1, VX, #16
adds VX, VX, UNIT_X
bmi 55f
5: subs VX, VX, SRC_WIDTH_FIXED
bpl 5b
55:
- add TMP1, mem_operand, TMP1, lsl #1
+ add TMP1, \mem_operand, TMP1, lsl #1
asr TMP2, VX, #16
adds VX, VX, UNIT_X
bmi 55f
5: subs VX, VX, SRC_WIDTH_FIXED
bpl 5b
55:
- add TMP2, mem_operand, TMP2, lsl #1
- ld1 {v&reg1&.h}[0], [TMP1]
+ add TMP2, \mem_operand, TMP2, lsl #1
+ ld1 {v\()\reg1\().h}[0], [TMP1]
asr TMP1, VX, #16
adds VX, VX, UNIT_X
bmi 55f
5: subs VX, VX, SRC_WIDTH_FIXED
bpl 5b
55:
- add TMP1, mem_operand, TMP1, lsl #1
- ld1 {v&reg1&.h}[1], [TMP2]
+ add TMP1, \mem_operand, TMP1, lsl #1
+ ld1 {v\()\reg1\().h}[1], [TMP2]
asr TMP2, VX, #16
adds VX, VX, UNIT_X
bmi 55f
5: subs VX, VX, SRC_WIDTH_FIXED
bpl 5b
55:
- add TMP2, mem_operand, TMP2, lsl #1
- ld1 {v&reg1&.h}[2], [TMP1]
- ld1 {v&reg1&.h}[3], [TMP2]
-.elseif elem_size == 32
+ add TMP2, \mem_operand, TMP2, lsl #1
+ ld1 {v\()\reg1\().h}[2], [TMP1]
+ ld1 {v\()\reg1\().h}[3], [TMP2]
+.elseif \elem_size == 32
asr TMP1, VX, #16
adds VX, VX, UNIT_X
bmi 55f
5: subs VX, VX, SRC_WIDTH_FIXED
bpl 5b
55:
- add TMP1, mem_operand, TMP1, lsl #2
+ add TMP1, \mem_operand, TMP1, lsl #2
asr TMP2, VX, #16
adds VX, VX, UNIT_X
bmi 55f
5: subs VX, VX, SRC_WIDTH_FIXED
bpl 5b
55:
- add TMP2, mem_operand, TMP2, lsl #2
- ld1 {v&reg1&.s}[0], [TMP1]
- ld1 {v&reg1&.s}[1], [TMP2]
+ add TMP2, \mem_operand, TMP2, lsl #2
+ ld1 {v\()\reg1\().s}[0], [TMP1]
+ ld1 {v\()\reg1\().s}[1], [TMP2]
.else
.error "unsupported"
.endif
.endm
.macro pixld2_s elem_size, reg1, reg2, mem_operand
-.if 0 /* elem_size == 32 */
+.if 0 /* \elem_size == 32 */
mov TMP1, VX, asr #16
add VX, VX, UNIT_X, asl #1
- add TMP1, mem_operand, TMP1, asl #2
+ add TMP1, \mem_operand, TMP1, asl #2
mov TMP2, VX, asr #16
sub VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #2
- ld1 {v&reg1&.s}[0], [TMP1]
+ add TMP2, \mem_operand, TMP2, asl #2
+ ld1 {v\()\reg1\().s}[0], [TMP1]
mov TMP1, VX, asr #16
add VX, VX, UNIT_X, asl #1
- add TMP1, mem_operand, TMP1, asl #2
- ld1 {v&reg2&.s}[0], [TMP2, :32]
+ add TMP1, \mem_operand, TMP1, asl #2
+ ld1 {v\()\reg2\().s}[0], [TMP2, :32]
mov TMP2, VX, asr #16
add VX, VX, UNIT_X
- add TMP2, mem_operand, TMP2, asl #2
- ld1 {v&reg1&.s}[1], [TMP1]
- ld1 {v&reg2&.s}[1], [TMP2]
+ add TMP2, \mem_operand, TMP2, asl #2
+ ld1 {v\()\reg1\().s}[1], [TMP1]
+ ld1 {v\()\reg2\().s}[1], [TMP2]
.else
- pixld1_s elem_size, reg1, mem_operand
- pixld1_s elem_size, reg2, mem_operand
+ pixld1_s \elem_size, \reg1, \mem_operand
+ pixld1_s \elem_size, \reg2, \mem_operand
.endif
.endm
.macro pixld0_s elem_size, reg1, idx, mem_operand
-.if elem_size == 16
+.if \elem_size == 16
asr TMP1, VX, #16
adds VX, VX, UNIT_X
bmi 55f
5: subs VX, VX, SRC_WIDTH_FIXED
bpl 5b
55:
- add TMP1, mem_operand, TMP1, lsl #1
- ld1 {v&reg1&.h}[idx], [TMP1]
-.elseif elem_size == 32
+ add TMP1, \mem_operand, TMP1, lsl #1
+ ld1 {v\()\reg1\().h}[\idx], [TMP1]
+.elseif \elem_size == 32
asr DUMMY, VX, #16
mov TMP1, DUMMY
adds VX, VX, UNIT_X
@@ -325,85 +325,85 @@
5: subs VX, VX, SRC_WIDTH_FIXED
bpl 5b
55:
- add TMP1, mem_operand, TMP1, lsl #2
- ld1 {v&reg1&.s}[idx], [TMP1]
+ add TMP1, \mem_operand, TMP1, lsl #2
+ ld1 {v\()\reg1\().s}[\idx], [TMP1]
.endif
.endm
.macro pixld_s_internal numbytes, elem_size, basereg, mem_operand
-.if numbytes == 32
- pixld2_s elem_size, %(basereg+4), %(basereg+5), mem_operand
- pixld2_s elem_size, %(basereg+6), %(basereg+7), mem_operand
- pixdeinterleave elem_size, %(basereg+4)
-.elseif numbytes == 16
- pixld2_s elem_size, %(basereg+2), %(basereg+3), mem_operand
-.elseif numbytes == 8
- pixld1_s elem_size, %(basereg+1), mem_operand
-.elseif numbytes == 4
- .if elem_size == 32
- pixld0_s elem_size, %(basereg+0), 1, mem_operand
- .elseif elem_size == 16
- pixld0_s elem_size, %(basereg+0), 2, mem_operand
- pixld0_s elem_size, %(basereg+0), 3, mem_operand
+.if \numbytes == 32
+ pixld2_s \elem_size, %(\basereg+4), %(\basereg+5), \mem_operand
+ pixld2_s \elem_size, %(\basereg+6), %(\basereg+7), \mem_operand
+ pixdeinterleave \elem_size, %(\basereg+4)
+.elseif \numbytes == 16
+ pixld2_s \elem_size, %(\basereg+2), %(\basereg+3), \mem_operand
+.elseif \numbytes == 8
+ pixld1_s \elem_size, %(\basereg+1), \mem_operand
+.elseif \numbytes == 4
+ .if \elem_size == 32
+ pixld0_s \elem_size, %(\basereg+0), 1, \mem_operand
+ .elseif \elem_size == 16
+ pixld0_s \elem_size, %(\basereg+0), 2, \mem_operand
+ pixld0_s \elem_size, %(\basereg+0), 3, \mem_operand
.else
- pixld0_s elem_size, %(basereg+0), 4, mem_operand
- pixld0_s elem_size, %(basereg+0), 5, mem_operand
- pixld0_s elem_size, %(basereg+0), 6, mem_operand
- pixld0_s elem_size, %(basereg+0), 7, mem_operand
+ pixld0_s \elem_size, %(\basereg+0), 4, \mem_operand
+ pixld0_s \elem_size, %(\basereg+0), 5, \mem_operand
+ pixld0_s \elem_size, %(\basereg+0), 6, \mem_operand
+ pixld0_s \elem_size, %(\basereg+0), 7, \mem_operand
.endif
-.elseif numbytes == 2
- .if elem_size == 16
- pixld0_s elem_size, %(basereg+0), 1, mem_operand
+.elseif \numbytes == 2
+ .if \elem_size == 16
+ pixld0_s \elem_size, %(\basereg+0), 1, \mem_operand
.else
- pixld0_s elem_size, %(basereg+0), 2, mem_operand
- pixld0_s elem_size, %(basereg+0), 3, mem_operand
+ pixld0_s \elem_size, %(\basereg+0), 2, \mem_operand
+ pixld0_s \elem_size, %(\basereg+0), 3, \mem_operand
.endif
-.elseif numbytes == 1
- pixld0_s elem_size, %(basereg+0), 1, mem_operand
+.elseif \numbytes == 1
+ pixld0_s \elem_size, %(\basereg+0), 1, \mem_operand
.else
- .error "unsupported size: numbytes"
+ .error "unsupported size: \numbytes"
.endif
.endm
.macro pixld_s numpix, bpp, basereg, mem_operand
-.if bpp > 0
- pixld_s_internal %(numpix * bpp / 8), %(bpp), basereg, mem_operand
+.if \bpp > 0
+ pixld_s_internal %(\numpix * \bpp / 8), %(\bpp), \basereg, \mem_operand
.endif
.endm
.macro vuzp8 reg1, reg2
umov DUMMY, v16.d[0]
- uzp1 v16.8b, v&reg1&.8b, v&reg2&.8b
- uzp2 v&reg2&.8b, v&reg1&.8b, v&reg2&.8b
- mov v&reg1&.8b, v16.8b
+ uzp1 v16.8b, v\()\reg1\().8b, v\()\reg2\().8b
+ uzp2 v\()\reg2\().8b, v\()\reg1\().8b, v\()\reg2\().8b
+ mov v\()\reg1\().8b, v16.8b
mov v16.d[0], DUMMY
.endm
.macro vzip8 reg1, reg2
umov DUMMY, v16.d[0]
- zip1 v16.8b, v&reg1&.8b, v&reg2&.8b
- zip2 v&reg2&.8b, v&reg1&.8b, v&reg2&.8b
- mov v&reg1&.8b, v16.8b
+ zip1 v16.8b, v\()\reg1\().8b, v\()\reg2\().8b
+ zip2 v\()\reg2\().8b, v\()\reg1\().8b, v\()\reg2\().8b
+ mov v\()\reg1\().8b, v16.8b
mov v16.d[0], DUMMY
.endm
/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
.macro pixdeinterleave bpp, basereg
-.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
- vuzp8 %(basereg+0), %(basereg+1)
- vuzp8 %(basereg+2), %(basereg+3)
- vuzp8 %(basereg+1), %(basereg+3)
- vuzp8 %(basereg+0), %(basereg+2)
+.if (\bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ vuzp8 %(\basereg+0), %(\basereg+1)
+ vuzp8 %(\basereg+2), %(\basereg+3)
+ vuzp8 %(\basereg+1), %(\basereg+3)
+ vuzp8 %(\basereg+0), %(\basereg+2)
.endif
.endm
/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
.macro pixinterleave bpp, basereg
-.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
- vzip8 %(basereg+0), %(basereg+2)
- vzip8 %(basereg+1), %(basereg+3)
- vzip8 %(basereg+2), %(basereg+3)
- vzip8 %(basereg+0), %(basereg+1)
+.if (\bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ vzip8 %(\basereg+0), %(\basereg+2)
+ vzip8 %(\basereg+1), %(\basereg+3)
+ vzip8 %(\basereg+2), %(\basereg+3)
+ vzip8 %(\basereg+0), %(\basereg+1)
.endif
.endm
@@ -437,52 +437,52 @@
*/
.macro PF a, x:vararg
.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED)
- a x
+ \a \x
.endif
.endm
.macro cache_preload std_increment, boost_increment
.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
-.if std_increment != 0
- PF add PF_X, PF_X, #std_increment
+.if \std_increment != 0
+ PF add, PF_X, PF_X, #\std_increment
.endif
- PF tst PF_CTL, #0xF
- PF beq 71f
- PF add PF_X, PF_X, #boost_increment
- PF sub PF_CTL, PF_CTL, #1
+ PF tst, PF_CTL, #0xF
+ PF beq, 71f
+ PF add, PF_X, PF_X, #\boost_increment
+ PF sub, PF_CTL, PF_CTL, #1
71:
- PF cmp PF_X, ORIG_W
+ PF cmp, PF_X, ORIG_W
.if src_bpp_shift >= 0
- PF lsl DUMMY, PF_X, #src_bpp_shift
- PF prfm PREFETCH_MODE, [PF_SRC, DUMMY]
+ PF lsl, DUMMY, PF_X, #src_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_SRC, DUMMY]
.endif
.if dst_r_bpp != 0
- PF lsl DUMMY, PF_X, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [PF_DST, DUMMY]
+ PF lsl, DUMMY, PF_X, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_DST, DUMMY]
.endif
.if mask_bpp_shift >= 0
- PF lsl DUMMY, PF_X, #mask_bpp_shift
- PF prfm PREFETCH_MODE, [PF_MASK, DUMMY]
+ PF lsl, DUMMY, PF_X, #mask_bpp_shift
+ PF prfm, PREFETCH_MODE, [PF_MASK, DUMMY]
.endif
- PF ble 71f
- PF sub PF_X, PF_X, ORIG_W
- PF subs PF_CTL, PF_CTL, #0x10
+ PF ble, 71f
+ PF sub, PF_X, PF_X, ORIG_W
+ PF subs, PF_CTL, PF_CTL, #0x10
71:
- PF ble 72f
+ PF ble, 72f
.if src_bpp_shift >= 0
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb DUMMY, [PF_SRC, DUMMY]
- PF add PF_SRC, PF_SRC, #1
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC, DUMMY]
+ PF add, PF_SRC, PF_SRC, #1
.endif
.if dst_r_bpp != 0
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb DUMMY, [PF_DST, DUMMY]
- PF add PF_DST, PF_DST, #1
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST, DUMMY]
+ PF add, PF_DST, PF_DST, #1
.endif
.if mask_bpp_shift >= 0
- PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF ldrsb DUMMY, [PF_MASK, DUMMY]
- PF add PF_MASK, PF_MASK, #1
+ PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
+ PF ldrsb, DUMMY, [PF_MASK, DUMMY]
+ PF add, PF_MASK, PF_MASK, #1
.endif
72:
.endif
@@ -521,21 +521,21 @@
.if src_bpp > 0 || mask_bpp > 0 || dst_r_bpp > 0
.irp lowbit, 1, 2, 4, 8, 16
-local skip1
-.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
-.if lowbit < 16 /* we don't need more than 16-byte alignment */
- tst DST_R, #lowbit
+
+.if (dst_w_bpp <= (\lowbit * 8)) && ((\lowbit * 8) < (pixblock_size * dst_w_bpp))
+.if \lowbit < 16 /* we don't need more than 16-byte alignment */
+ tst DST_R, #\lowbit
beq 51f
.endif
- pixld_src (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
- pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK
+ pixld_src (\lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
+ pixld (\lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK
.if dst_r_bpp > 0
- pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R
+ pixld_a (\lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R
.else
- add DST_R, DST_R, #lowbit
+ add DST_R, DST_R, #\lowbit
.endif
- PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
- sub W, W, #(lowbit * 8 / dst_w_bpp)
+ PF add, PF_X, PF_X, #(\lowbit * 8 / dst_w_bpp)
+ sub W, W, #(\lowbit * 8 / dst_w_bpp)
51:
.endif
.endr
@@ -544,23 +544,23 @@ local skip1
pixdeinterleave mask_bpp, mask_basereg
pixdeinterleave dst_r_bpp, dst_r_basereg
- process_pixblock_head
+ \process_pixblock_head
cache_preload 0, pixblock_size
cache_preload_simple
- process_pixblock_tail
+ \process_pixblock_tail
pixinterleave dst_w_bpp, dst_w_basereg
.irp lowbit, 1, 2, 4, 8, 16
-.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
-.if lowbit < 16 /* we don't need more than 16-byte alignment */
- tst DST_W, #lowbit
+.if (dst_w_bpp <= (\lowbit * 8)) && ((\lowbit * 8) < (pixblock_size * dst_w_bpp))
+.if \lowbit < 16 /* we don't need more than 16-byte alignment */
+ tst DST_W, #\lowbit
beq 51f
.endif
.if src_bpp == 0 && mask_bpp == 0 && dst_r_bpp == 0
- sub W, W, #(lowbit * 8 / dst_w_bpp)
+ sub W, W, #(\lowbit * 8 / dst_w_bpp)
.endif
- pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W
+ pixst_a (\lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W
51:
.endif
.endr
@@ -592,18 +592,18 @@ local skip1
beq 52f
.if src_bpp > 0 || mask_bpp > 0 || dst_r_bpp > 0
.irp chunk_size, 16, 8, 4, 2, 1
-.if pixblock_size > chunk_size
- tst W, #chunk_size
+.if pixblock_size > \chunk_size
+ tst W, #\chunk_size
beq 51f
- pixld_src chunk_size, src_bpp, src_basereg, SRC
- pixld chunk_size, mask_bpp, mask_basereg, MASK
-.if dst_aligned_flag != 0
- pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R
+ pixld_src \chunk_size, src_bpp, src_basereg, SRC
+ pixld \chunk_size, mask_bpp, mask_basereg, MASK
+.if \dst_aligned_flag != 0
+ pixld_a \chunk_size, dst_r_bpp, dst_r_basereg, DST_R
.else
- pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R
+ pixld \chunk_size, dst_r_bpp, dst_r_basereg, DST_R
.endif
-.if cache_preload_flag != 0
- PF add PF_X, PF_X, #chunk_size
+.if \cache_preload_flag != 0
+ PF add, PF_X, PF_X, #\chunk_size
.endif
51:
.endif
@@ -613,21 +613,21 @@ local skip1
pixdeinterleave mask_bpp, mask_basereg
pixdeinterleave dst_r_bpp, dst_r_basereg
- process_pixblock_head
-.if cache_preload_flag != 0
+ \process_pixblock_head
+.if \cache_preload_flag != 0
cache_preload 0, pixblock_size
cache_preload_simple
.endif
- process_pixblock_tail
+ \process_pixblock_tail
pixinterleave dst_w_bpp, dst_w_basereg
.irp chunk_size, 16, 8, 4, 2, 1
-.if pixblock_size > chunk_size
- tst W, #chunk_size
+.if pixblock_size > \chunk_size
+ tst W, #\chunk_size
beq 51f
-.if dst_aligned_flag != 0
- pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W
+.if \dst_aligned_flag != 0
+ pixst_a \chunk_size, dst_w_bpp, dst_w_basereg, DST_W
.else
- pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W
+ pixst \chunk_size, dst_w_bpp, dst_w_basereg, DST_W
.endif
51:
.endif
@@ -660,7 +660,7 @@ local skip1
.endif
subs H, H, #1
mov DST_R, DST_W
- bge start_of_loop_label
+ bge \start_of_loop_label
.endm
/*
@@ -687,7 +687,7 @@ local skip1
src_basereg_ = 0, \
mask_basereg_ = 24
- pixman_asm_function fname
+ pixman_asm_function \fname
stp x29, x30, [sp, -16]!
mov x29, sp
sub sp, sp, 232 /* push all registers */
@@ -712,10 +712,10 @@ local skip1
* has to be used instead of ADVANCED.
*/
.set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
-.if prefetch_distance == 0
+.if \prefetch_distance == 0
.set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \
- ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24))
+ ((\src_bpp_ == 24) || (\mask_bpp_ == 24) || (\dst_w_bpp_ == 24))
.set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE
.endif
@@ -723,17 +723,17 @@ local skip1
* Make some macro arguments globally visible and accessible
* from other macros
*/
- .set src_bpp, src_bpp_
- .set mask_bpp, mask_bpp_
- .set dst_w_bpp, dst_w_bpp_
- .set pixblock_size, pixblock_size_
- .set dst_w_basereg, dst_w_basereg_
- .set dst_r_basereg, dst_r_basereg_
- .set src_basereg, src_basereg_
- .set mask_basereg, mask_basereg_
+ .set src_bpp, \src_bpp_
+ .set mask_bpp, \mask_bpp_
+ .set dst_w_bpp, \dst_w_bpp_
+ .set pixblock_size, \pixblock_size_
+ .set dst_w_basereg, \dst_w_basereg_
+ .set dst_r_basereg, \dst_r_basereg_
+ .set src_basereg, \src_basereg_
+ .set mask_basereg, \mask_basereg_
.macro pixld_src x:vararg
- pixld x
+ pixld \x
.endm
.macro fetch_src_pixblock
pixld_src pixblock_size, src_bpp, \
@@ -810,22 +810,22 @@ local skip1
.error "requested dst bpp (dst_w_bpp) is not supported"
.endif
-.if (((flags) & FLAG_DST_READWRITE) != 0)
+.if (((\flags) & FLAG_DST_READWRITE) != 0)
.set dst_r_bpp, dst_w_bpp
.else
.set dst_r_bpp, 0
.endif
-.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
+.if (((\flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
.set DEINTERLEAVE_32BPP_ENABLED, 1
.else
.set DEINTERLEAVE_32BPP_ENABLED, 0
.endif
-.if prefetch_distance < 0 || prefetch_distance > 15
- .error "invalid prefetch distance (prefetch_distance)"
+.if \prefetch_distance < 0 || \prefetch_distance > 15
+ .error "invalid prefetch distance (\prefetch_distance)"
.endif
- PF mov PF_X, #0
+ PF mov, PF_X, #0
mov DST_R, DST_W
.if src_bpp == 24
@@ -844,15 +844,15 @@ local skip1
/*
* Setup advanced prefetcher initial state
*/
- PF mov PF_SRC, SRC
- PF mov PF_DST, DST_R
- PF mov PF_MASK, MASK
- /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
- PF lsl DUMMY, H, #4
- PF mov PF_CTL, DUMMY
- PF add PF_CTL, PF_CTL, #(prefetch_distance - 0x10)
-
- init
+ PF mov, PF_SRC, SRC
+ PF mov, PF_DST, DST_R
+ PF mov, PF_MASK, MASK
+ /* PF_CTL = \prefetch_distance | ((h - 1) << 4) */
+ PF lsl, DUMMY, H, #4
+ PF mov, PF_CTL, DUMMY
+ PF add, PF_CTL, PF_CTL, #(\prefetch_distance - 0x10)
+
+ \init
subs H, H, #1
mov ORIG_W, W
blt 9f
@@ -863,9 +863,9 @@ local skip1
* long scanlines
*/
0:
- ensure_destination_ptr_alignment process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
+ ensure_destination_ptr_alignment \process_pixblock_head, \
+ \process_pixblock_tail, \
+ \process_pixblock_tail_head
/* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
pixld_a pixblock_size, dst_r_bpp, \
@@ -873,32 +873,32 @@ local skip1
fetch_src_pixblock
pixld pixblock_size, mask_bpp, \
(mask_basereg - pixblock_size * mask_bpp / 64), MASK
- PF add PF_X, PF_X, #pixblock_size
- process_pixblock_head
+ PF add, PF_X, PF_X, #pixblock_size
+ \process_pixblock_head
cache_preload 0, pixblock_size
cache_preload_simple
subs W, W, #(pixblock_size * 2)
blt 200f
100:
- process_pixblock_tail_head
+ \process_pixblock_tail_head
cache_preload_simple
subs W, W, #pixblock_size
bge 100b
200:
- process_pixblock_tail
+ \process_pixblock_tail
pixst_a pixblock_size, dst_w_bpp, \
(dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
/* Process the remaining trailing pixels in the scanline */
process_trailing_pixels 1, 1, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
+ \process_pixblock_head, \
+ \process_pixblock_tail, \
+ \process_pixblock_tail_head
advance_to_next_scanline 0b
- cleanup
+ \cleanup
1000:
/* pop all registers */
sub x29, x29, 64
@@ -925,16 +925,16 @@ local skip1
*/
800:
.if src_bpp_shift >= 0
- PF lsl DUMMY, SRC_STRIDE, #src_bpp_shift
- PF prfm PREFETCH_MODE, [SRC, DUMMY]
+ PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
+ PF prfm, PREFETCH_MODE, [SRC, DUMMY]
.endif
.if dst_r_bpp != 0
- PF lsl DUMMY, DST_STRIDE, #dst_bpp_shift
- PF prfm PREFETCH_MODE, [DST_R, DUMMY]
+ PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
+ PF prfm, PREFETCH_MODE, [DST_R, DUMMY]
.endif
.if mask_bpp_shift >= 0
- PF lsl DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF prfm PREFETCH_MODE, [MASK, DUMMY]
+ PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
+ PF prfm, PREFETCH_MODE, [MASK, DUMMY]
.endif
/* Process exactly pixblock_size pixels if needed */
tst W, #pixblock_size
@@ -944,19 +944,19 @@ local skip1
fetch_src_pixblock
pixld pixblock_size, mask_bpp, \
(mask_basereg - pixblock_size * mask_bpp / 64), MASK
- process_pixblock_head
- process_pixblock_tail
+ \process_pixblock_head
+ \process_pixblock_tail
pixst pixblock_size, dst_w_bpp, \
(dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
100:
/* Process the remaining trailing pixels in the scanline */
process_trailing_pixels 0, 0, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
+ \process_pixblock_head, \
+ \process_pixblock_tail, \
+ \process_pixblock_tail_head
advance_to_next_scanline 800b
9:
- cleanup
+ \cleanup
/* pop all registers */
sub x29, x29, 64
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
@@ -995,7 +995,7 @@ local skip1
.unreq PF_DST
.unreq PF_MASK
.unreq DUMMY
- .endfunc
+ pixman_end_asm_function
.endm
/*
@@ -1019,23 +1019,23 @@ local skip1
src_basereg_ = 0, \
mask_basereg_ = 24
- pixman_asm_function fname
+ pixman_asm_function \fname
.set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
/*
* Make some macro arguments globally visible and accessible
* from other macros
*/
- .set src_bpp, src_bpp_
- .set mask_bpp, mask_bpp_
- .set dst_w_bpp, dst_w_bpp_
- .set pixblock_size, pixblock_size_
- .set dst_w_basereg, dst_w_basereg_
- .set dst_r_basereg, dst_r_basereg_
- .set src_basereg, src_basereg_
- .set mask_basereg, mask_basereg_
-
-.if use_nearest_scaling != 0
+ .set src_bpp, \src_bpp_
+ .set mask_bpp, \mask_bpp_
+ .set dst_w_bpp, \dst_w_bpp_
+ .set pixblock_size, \pixblock_size_
+ .set dst_w_basereg, \dst_w_basereg_
+ .set dst_r_basereg, \dst_r_basereg_
+ .set src_basereg, \src_basereg_
+ .set mask_basereg, \mask_basereg_
+
+.if \use_nearest_scaling != 0
/*
* Assign symbolic names to registers for nearest scaling
*/
@@ -1052,7 +1052,7 @@ local skip1
DUMMY .req x30
.macro pixld_src x:vararg
- pixld_s x
+ pixld_s \x
.endm
sxtw x0, w0
@@ -1080,7 +1080,7 @@ local skip1
DUMMY .req x30
.macro pixld_src x:vararg
- pixld x
+ pixld \x
.endm
sxtw x0, w0
@@ -1093,12 +1093,12 @@ local skip1
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
.endif
-.if (((flags) & FLAG_DST_READWRITE) != 0)
+.if (((\flags) & FLAG_DST_READWRITE) != 0)
.set dst_r_bpp, dst_w_bpp
.else
.set dst_r_bpp, 0
.endif
-.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
+.if (((\flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
.set DEINTERLEAVE_32BPP_ENABLED, 1
.else
.set DEINTERLEAVE_32BPP_ENABLED, 0
@@ -1109,15 +1109,15 @@ local skip1
(src_basereg - pixblock_size * src_bpp / 64), SRC
.endm
- init
+ \init
mov DST_R, DST_W
cmp W, #pixblock_size
blt 800f
- ensure_destination_ptr_alignment process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
+ ensure_destination_ptr_alignment \process_pixblock_head, \
+ \process_pixblock_tail, \
+ \process_pixblock_tail_head
subs W, W, #pixblock_size
blt 700f
@@ -1128,26 +1128,26 @@ local skip1
fetch_src_pixblock
pixld pixblock_size, mask_bpp, \
(mask_basereg - pixblock_size * mask_bpp / 64), MASK
- process_pixblock_head
+ \process_pixblock_head
subs W, W, #pixblock_size
blt 200f
100:
- process_pixblock_tail_head
+ \process_pixblock_tail_head
subs W, W, #pixblock_size
bge 100b
200:
- process_pixblock_tail
+ \process_pixblock_tail
pixst_a pixblock_size, dst_w_bpp, \
(dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
700:
/* Process the remaining trailing pixels in the scanline (dst aligned) */
process_trailing_pixels 0, 1, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
+ \process_pixblock_head, \
+ \process_pixblock_tail, \
+ \process_pixblock_tail_head
- cleanup
-.if use_nearest_scaling != 0
+ \cleanup
+.if \use_nearest_scaling != 0
sub x29, x29, 64
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
@@ -1167,12 +1167,12 @@ local skip1
800:
/* Process the remaining trailing pixels in the scanline (dst unaligned) */
process_trailing_pixels 0, 0, \
- process_pixblock_head, \
- process_pixblock_tail, \
- process_pixblock_tail_head
+ \process_pixblock_head, \
+ \process_pixblock_tail, \
+ \process_pixblock_tail_head
- cleanup
-.if use_nearest_scaling != 0
+ \cleanup
+.if \use_nearest_scaling != 0
sub x29, x29, 64
ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x29], 32
ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x29], 32
@@ -1213,15 +1213,15 @@ local skip1
.purgem fetch_src_pixblock
.purgem pixld_src
- .endfunc
+ pixman_end_asm_function
.endm
.macro generate_composite_function_single_scanline x:vararg
- generate_composite_function_scanline 0, x
+ generate_composite_function_scanline 0, \x
.endm
.macro generate_composite_function_nearest_scanline x:vararg
- generate_composite_function_scanline 1, x
+ generate_composite_function_scanline 1, \x
.endm
/* Default prologue/epilogue, nothing special needs to be done */
@@ -1255,22 +1255,22 @@ local skip1
* value (in) is lost.
*/
.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b
- shrn &out_r&.8b, &in&.8h, #8
- shrn &out_g&.8b, &in&.8h, #3
- sli &in&.8h, &in&.8h, #5
- movi &out_a&.8b, #255
- sri &out_r&.8b, &out_r&.8b, #5
- sri &out_g&.8b, &out_g&.8b, #6
- shrn &out_b&.8b, &in&.8h, #2
+ shrn \()\out_r\().8b, \()\in\().8h, #8
+ shrn \()\out_g\().8b, \()\in\().8h, #3
+ sli \()\in\().8h, \()\in\().8h, #5
+ movi \()\out_a\().8b, #255
+ sri \()\out_r\().8b, \()\out_r\().8b, #5
+ sri \()\out_g\().8b, \()\out_g\().8b, #6
+ shrn \()\out_b\().8b, \()\in\().8h, #2
.endm
.macro convert_0565_to_x888 in, out_r, out_g, out_b
- shrn &out_r&.8b, &in&.8h, #8
- shrn &out_g&.8b, &in&.8h, #3
- sli &in&.8h, &in&.8h, #5
- sri &out_r&.8b, &out_r&.8b, #5
- sri &out_g&.8b, &out_g&.8b, #6
- shrn &out_b&.8b, &in&.8h, #2
+ shrn \()\out_r\().8b, \()\in\().8h, #8
+ shrn \()\out_g\().8b, \()\in\().8h, #3
+ sli \()\in\().8h, \()\in\().8h, #5
+ sri \()\out_r\().8b, \()\out_r\().8b, #5
+ sri \()\out_g\().8b, \()\out_g\().8b, #6
+ shrn \()\out_b\().8b, \()\in\().8h, #2
.endm
/*
@@ -1280,14 +1280,14 @@ local skip1
* registers (tmp1, tmp2)
*/
.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2
- ushll &tmp1&.8h, &in_g&.8b, #7
- shl &tmp1&.8h, &tmp1&.8h, #1
- ushll &out&.8h, &in_r&.8b, #7
- shl &out&.8h, &out&.8h, #1
- ushll &tmp2&.8h, &in_b&.8b, #7
- shl &tmp2&.8h, &tmp2&.8h, #1
- sri &out&.8h, &tmp1&.8h, #5
- sri &out&.8h, &tmp2&.8h, #11
+ ushll \()\tmp1\().8h, \()\in_g\().8b, #7
+ shl \()\tmp1\().8h, \()\tmp1\().8h, #1
+ ushll \()\out\().8h, \()\in_r\().8b, #7
+ shl \()\out\().8h, \()\out\().8h, #1
+ ushll \()\tmp2\().8h, \()\in_b\().8b, #7
+ shl \()\tmp2\().8h, \()\tmp2\().8h, #1
+ sri \()\out\().8h, \()\tmp1\().8h, #5
+ sri \()\out\().8h, \()\tmp2\().8h, #11
.endm
/*
@@ -1297,14 +1297,14 @@ local skip1
* value from 'in' is lost
*/
.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
- shl &out0&.4h, &in&.4h, #5 /* G top 6 bits */
- shl &tmp&.4h, &in&.4h, #11 /* B top 5 bits */
- sri &in&.4h, &in&.4h, #5 /* R is ready in top bits */
- sri &out0&.4h, &out0&.4h, #6 /* G is ready in top bits */
- sri &tmp&.4h, &tmp&.4h, #5 /* B is ready in top bits */
- ushr &out1&.4h, &in&.4h, #8 /* R is in place */
- sri &out0&.4h, &tmp&.4h, #8 /* G & B is in place */
- zip1 &tmp&.4h, &out0&.4h, &out1&.4h /* everything is in place */
- zip2 &out1&.4h, &out0&.4h, &out1&.4h
- mov &out0&.d[0], &tmp&.d[0]
+ shl \()\out0\().4h, \()\in\().4h, #5 /* G top 6 bits */
+ shl \()\tmp\().4h, \()\in\().4h, #11 /* B top 5 bits */
+ sri \()\in\().4h, \()\in\().4h, #5 /* R is ready \in top bits */
+ sri \()\out0\().4h, \()\out0\().4h, #6 /* G is ready \in top bits */
+ sri \()\tmp\().4h, \()\tmp\().4h, #5 /* B is ready \in top bits */
+ ushr \()\out1\().4h, \()\in\().4h, #8 /* R is \in place */
+ sri \()\out0\().4h, \()\tmp\().4h, #8 /* G \() B is \in place */
+ zip1 \()\tmp\().4h, \()\out0\().4h, \()\out1\().4h /* everything is \in place */
+ zip2 \()\out1\().4h, \()\out0\().4h, \()\out1\().4h
+ mov \()\out0\().d[0], \()\tmp\().d[0]
.endm
--
2.41.0