diff options
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 10 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/slp_2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/slp_3.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/slp_4.c | 2 |
7 files changed, 30 insertions, 20 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d665f3d..36d3f50 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ 2018-02-01 Richard Sandiford <richard.sandiford@linaro.org> + * config/aarch64/aarch64-sve.md (sve_ld1rq): Replace with... + (*sve_ld1rq<Vesize>): ... this new pattern. Handle all element sizes, + not just bytes. + * config/aarch64/aarch64.c (aarch64_expand_sve_widened_duplicate): + Remove BSWAP handing for big-endian targets and use the form of + LD1RQ appropariate for the mode. + +2018-02-01 Richard Sandiford <richard.sandiford@linaro.org> + * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Handle all CONST_VECTOR_DUPLICATE_P vectors, not just those with a single duplicated element. diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index ee942df..068fd8c 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -652,14 +652,14 @@ ;; Load 128 bits from memory and duplicate to fill a vector. Since there ;; are so few operations on 128-bit "elements", we don't define a VNx1TI ;; and simply use vectors of bytes instead. -(define_insn "sve_ld1rq" - [(set (match_operand:VNx16QI 0 "register_operand" "=w") - (unspec:VNx16QI - [(match_operand:VNx16BI 1 "register_operand" "Upl") +(define_insn "*sve_ld1rq<Vesize>" + [(set (match_operand:SVE_ALL 0 "register_operand" "=w") + (unspec:SVE_ALL + [(match_operand:<VPRED> 1 "register_operand" "Upl") (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")] UNSPEC_LD1RQ))] "TARGET_SVE" - "ld1rqb\t%0.b, %1/z, %2" + "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2" ) ;; Implement a predicate broadcast by shifting the low bit of the scalar diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1278f83..ae142b4 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -2787,16 +2787,7 @@ aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode, return true; } - /* The bytes are loaded in little-endian order, so do a byteswap on - big-endian targets. */ - if (BYTES_BIG_ENDIAN) - { - src = simplify_unary_operation (BSWAP, src_mode, src, src_mode); - if (!src) - return NULL_RTX; - } - - /* Use LD1RQ to load the 128 bits from memory. */ + /* Use LD1RQ[BHWD] to load the 128 bits from memory. */ src = force_const_mem (src_mode, src); if (!src) return false; @@ -2808,8 +2799,12 @@ aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode, src = replace_equiv_address (src, addr); } - rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode)); - emit_insn (gen_sve_ld1rq (gen_lowpart (VNx16QImode, dest), ptrue, src)); + machine_mode mode = GET_MODE (dest); + unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode); + machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require (); + rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); + src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ); + emit_insn (gen_rtx_SET (dest, src)); return true; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 92d013e..f440d1e4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2018-02-01 Richard Sandiford <richard.sandiford@linaro.org> + + * gcc.target/aarch64/sve/slp_2.c: Expect LD1RQD rather than LD1RQB. + * gcc.target/aarch64/sve/slp_3.c: Expect LD1RQW rather than LD1RQB. + * gcc.target/aarch64/sve/slp_4.c: Expect LD1RQH rather than LD1RQB. + 2018-02-01 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/81661 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c index 4a219f2..657abb0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c @@ -32,7 +32,7 @@ TEST_ALL (VEC_PERM) /* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */ /* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */ /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 } } */ -/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */ +/* { dg-final { scan-assembler-times {\tld1rqd\tz[0-9]+\.d, } 3 } } */ /* { dg-final { scan-assembler-not {\tzip1\t} } } */ /* { dg-final { scan-assembler-not {\tzip2\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c index cfe20a8..dd47502 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c @@ -36,7 +36,7 @@ TEST_ALL (VEC_PERM) /* 1 for each 16-bit type and 4 for double. */ /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 7 } } */ /* 1 for each 32-bit type. */ -/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */ +/* { dg-final { scan-assembler-times {\tld1rqw\tz[0-9]+\.s, } 3 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c index 98ff68f..026fa8c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c @@ -38,7 +38,7 @@ TEST_ALL (VEC_PERM) /* 1 for each 8-bit type, 4 for each 32-bit type and 8 for double. */ /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 22 } } */ /* 1 for each 16-bit type. */ -/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]\.b, } 3 } } */ +/* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]\.h, } 3 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */ /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */ |