diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2023-09-14 11:39:53 +0100 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2023-09-14 11:39:53 +0100 |
commit | 0f1f6cf872a03d82ab1973780b37bb8572e96f58 (patch) | |
tree | c218666719d367c507373dac72928396b7e0cf30 /gcc | |
parent | 62b29347c38394ae32858f2301aa9aa65205984e (diff) | |
download | gcc-0f1f6cf872a03d82ab1973780b37bb8572e96f58.zip gcc-0f1f6cf872a03d82ab1973780b37bb8572e96f58.tar.gz gcc-0f1f6cf872a03d82ab1973780b37bb8572e96f58.tar.bz2 |
aarch64: Coerce addresses to be suitable for LD1RQ
In the following test:
svuint8_t ld(uint8_t *ptr) { return svld1rq(svptrue_b8(), ptr + 2); }
ptr + 2 is a valid address for an Advanced SIMD load, but not for
an SVE load. We therefore ended up generating:
ldr q0, [x0, 2]
dup z0.q, z0.q[0]
This patch makes us generate LD1RQ for that case too. It takes the
slightly old-school approach of making the predicate broader than
the constraint. That is: any valid memory address is accepted as
an operand before RA. If the instruction remains during RA, LRA will
coerce the address to match the constraint. If the instruction gets
split before RA, the splitter will load invalid addresses into a
scratch register.
gcc/
* config/aarch64/aarch64-sve.md (@aarch64_vec_duplicate_vq<mode>_le):
Accept all nonimmediate_operands, but keep the existing constraints.
If the instruction is split before RA, load invalid addresses into
a temporary register.
* config/aarch64/predicates.md (aarch64_sve_dup_ld1rq_operand): Delete.
gcc/testsuite/
* gcc.target/aarch64/sve/acle/general/ld1rq_1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 15 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_1.c | 33 |
3 files changed, 47 insertions, 5 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index da5534c..b223e7d 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2611,11 +2611,18 @@ ) ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version). +;; +;; The addressing mode range of LD1RQ does not match the addressing mode +;; range of LDR Qn. If the predicate enforced the LD1RQ range, we would +;; not be able to combine LDR Qns outside that range. The predicate +;; therefore accepts all memory operands, with only the constraints +;; enforcing the actual restrictions. If the instruction is split +;; before RA, we need to load invalid addresses into a temporary. (define_insn_and_split "@aarch64_vec_duplicate_vq<mode>_le" [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w") (vec_duplicate:SVE_FULL - (match_operand:<V128> 1 "aarch64_sve_dup_ld1rq_operand" "w, UtQ"))) + (match_operand:<V128> 1 "nonimmediate_operand" "w, UtQ"))) (clobber (match_scratch:VNx16BI 2 "=X, Upl"))] "TARGET_SVE && !BYTES_BIG_ENDIAN" { @@ -2633,6 +2640,12 @@ "&& MEM_P (operands[1])" [(const_int 0)] { + if (can_create_pseudo_p () + && !aarch64_sve_ld1rq_operand (operands[1], <V128>mode)) + { + rtx addr = force_reg (Pmode, XEXP (operands[1], 0)); + operands[1] = replace_equiv_address (operands[1], addr); + } if (GET_CODE (operands[2]) == SCRATCH) operands[2] = gen_reg_rtx (VNx16BImode); emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 2d8d1fe..01de474 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -732,10 +732,6 @@ (ior (match_operand 0 "register_operand") (match_operand 0 "aarch64_sve_ld1r_operand"))) -(define_predicate "aarch64_sve_dup_ld1rq_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_sve_ld1rq_operand"))) - (define_predicate "aarch64_sve_ptrue_svpattern_immediate" (and (match_code "const") (match_test "aarch64_sve_ptrue_svpattern_p (op, NULL)"))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_1.c new file mode 100644 index 0000000..9242c63 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1rq_1.c @@ -0,0 +1,33 @@ +/* { dg-options "-O2" } */ + +#include <arm_sve.h> + +#define TEST_OFFSET(TYPE, SUFFIX, OFFSET) \ + sv##TYPE##_t \ + test_##TYPE##_##SUFFIX (TYPE##_t *ptr) \ + { \ + return svld1rq(svptrue_b8(), ptr + OFFSET); \ + } + +#define TEST(TYPE) \ + TEST_OFFSET (TYPE, 0, 0) \ + TEST_OFFSET (TYPE, 1, 1) \ + TEST_OFFSET (TYPE, 2, 2) \ + TEST_OFFSET (TYPE, 16, 16) \ + TEST_OFFSET (TYPE, 0x10000, 0x10000) \ + TEST_OFFSET (TYPE, 0x10001, 0x10001) \ + TEST_OFFSET (TYPE, m1, -1) \ + TEST_OFFSET (TYPE, m2, -2) \ + TEST_OFFSET (TYPE, m16, -16) \ + TEST_OFFSET (TYPE, m0x10000, -0x10000) \ + TEST_OFFSET (TYPE, m0x10001, -0x10001) + +TEST (int8) +TEST (int16) +TEST (uint32) +TEST (uint64) + +/* { dg-final { scan-assembler-times {\tld1rqb\t} 11 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tld1rqh\t} 11 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tld1rqw\t} 11 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {\tld1rqd\t} 11 { target aarch64_little_endian } } } */ |