aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/aarch64/aarch64-sve.md10
-rw-r--r--gcc/config/aarch64/aarch64.c19
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_3.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/slp_4.c2
7 files changed, 30 insertions, 20 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d665f3d..36d3f50 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,14 @@
2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+ * config/aarch64/aarch64-sve.md (sve_ld1rq): Replace with...
+ (*sve_ld1rq<Vesize>): ... this new pattern. Handle all element sizes,
+ not just bytes.
+ * config/aarch64/aarch64.c (aarch64_expand_sve_widened_duplicate):
+ Remove BSWAP handing for big-endian targets and use the form of
+ LD1RQ appropariate for the mode.
+
+2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+
* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Handle
all CONST_VECTOR_DUPLICATE_P vectors, not just those with a single
duplicated element.
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index ee942df..068fd8c 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -652,14 +652,14 @@
;; Load 128 bits from memory and duplicate to fill a vector. Since there
;; are so few operations on 128-bit "elements", we don't define a VNx1TI
;; and simply use vectors of bytes instead.
-(define_insn "sve_ld1rq"
- [(set (match_operand:VNx16QI 0 "register_operand" "=w")
- (unspec:VNx16QI
- [(match_operand:VNx16BI 1 "register_operand" "Upl")
+(define_insn "*sve_ld1rq<Vesize>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
(match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
UNSPEC_LD1RQ))]
"TARGET_SVE"
- "ld1rqb\t%0.b, %1/z, %2"
+ "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
)
;; Implement a predicate broadcast by shifting the low bit of the scalar
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 1278f83..ae142b4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2787,16 +2787,7 @@ aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
return true;
}
- /* The bytes are loaded in little-endian order, so do a byteswap on
- big-endian targets. */
- if (BYTES_BIG_ENDIAN)
- {
- src = simplify_unary_operation (BSWAP, src_mode, src, src_mode);
- if (!src)
- return NULL_RTX;
- }
-
- /* Use LD1RQ to load the 128 bits from memory. */
+ /* Use LD1RQ[BHWD] to load the 128 bits from memory. */
src = force_const_mem (src_mode, src);
if (!src)
return false;
@@ -2808,8 +2799,12 @@ aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
src = replace_equiv_address (src, addr);
}
- rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
- emit_insn (gen_sve_ld1rq (gen_lowpart (VNx16QImode, dest), ptrue, src));
+ machine_mode mode = GET_MODE (dest);
+ unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
+ machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
+ rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
+ src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
+ emit_insn (gen_rtx_SET (dest, src));
return true;
}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 92d013e..f440d1e4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
+
+ * gcc.target/aarch64/sve/slp_2.c: Expect LD1RQD rather than LD1RQB.
+ * gcc.target/aarch64/sve/slp_3.c: Expect LD1RQW rather than LD1RQB.
+ * gcc.target/aarch64/sve/slp_4.c: Expect LD1RQH rather than LD1RQB.
+
2018-02-01 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/81661
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c
index 4a219f2..657abb0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c
@@ -32,7 +32,7 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */
/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */
/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 } } */
-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rqd\tz[0-9]+\.d, } 3 } } */
/* { dg-final { scan-assembler-not {\tzip1\t} } } */
/* { dg-final { scan-assembler-not {\tzip2\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c
index cfe20a8..dd47502 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c
@@ -36,7 +36,7 @@ TEST_ALL (VEC_PERM)
/* 1 for each 16-bit type and 4 for double. */
/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 7 } } */
/* 1 for each 32-bit type. */
-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rqw\tz[0-9]+\.s, } 3 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c
index 98ff68f..026fa8c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c
@@ -38,7 +38,7 @@ TEST_ALL (VEC_PERM)
/* 1 for each 8-bit type, 4 for each 32-bit type and 8 for double. */
/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 22 } } */
/* 1 for each 16-bit type. */
-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]\.h, } 3 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */
/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */