diff options
-rw-r--r-- | gcc/ChangeLog | 27 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins.cc | 6 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 107 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 46 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_1.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_10.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_2.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_3.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_4.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_5.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_6.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_7.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_8.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/load_extend_9.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/reduc_4.c | 2 |
16 files changed, 358 insertions, 75 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cccc042..83931c5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,32 @@ 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * config/aarch64/aarch64-sve.md + (@aarch64_load_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>): + (@aarch64_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>) + (@aarch64_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>): + Combine into... + (@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>): + ...this new pattern, handling extension to partial modes as well + as full modes. Describe the extension as a predicated rather than + unpredicated extension. + (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>) + (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>) + (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>): + Combine into... + (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>): + ...this new pattern, handling extension to partial modes as well + as full modes. Describe the extension as a predicated rather than + unpredicated extension. + * config/aarch64/aarch64-sve-builtins.cc + (function_expander::use_contiguous_load_insn): Add an extra + predicate for extending loads. + * config/aarch64/aarch64.c (aarch64_extending_load_p): New function. + (aarch64_sve_adjust_stmt_cost): Likewise. + (aarch64_add_stmt_cost): Use aarch64_sve_adjust_stmt_cost to adjust + the cost of SVE vector stmts. + +2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * config/aarch64/iterators.md (SVE_HSDI): New mode iterator. (narrower_mask): Handle VNx4HI, VNx2HI and VNx2SI. * config/aarch64/aarch64-sve.md diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 63d903d..27736b9 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -2790,7 +2790,9 @@ function_expander::use_vcond_mask_insn (insn_code icode, } /* Implement the call using instruction ICODE, which loads memory operand 1 - into register operand 0 under the control of predicate operand 2. */ + into register operand 0 under the control of predicate operand 2. + Extending loads have a further predicate (operand 3) that nominally + controls the extension. */ rtx function_expander::use_contiguous_load_insn (insn_code icode) { @@ -2799,6 +2801,8 @@ function_expander::use_contiguous_load_insn (insn_code icode) add_output_operand (icode); add_mem_operand (mem_mode, get_contiguous_base (mem_mode)); add_input_operand (icode, args[0]); + if (GET_MODE_UNIT_BITSIZE (mem_mode) < type_suffix (0).element_bits) + add_input_operand (icode, CONSTM1_RTX (VNx16BImode)); return generate_insn (icode); } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 40aeb95..ce1bd58 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1189,39 +1189,22 @@ ;; ------------------------------------------------------------------------- ;; Predicated load and extend, with 8 elements per 128-bit block. -(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>" - [(set (match_operand:VNx8_WIDE 0 "register_operand" "=w") - (ANY_EXTEND:VNx8_WIDE - (unspec:VNx8_NARROW - [(match_operand:VNx8BI 2 "register_operand" "Upl") - (match_operand:VNx8_NARROW 1 "memory_operand" "m")] - UNSPEC_LD1_SVE)))] - "TARGET_SVE" - "ld1<ANY_EXTEND:s><VNx8_NARROW:Vesize>\t%0.<VNx8_WIDE:Vetype>, %2/z, %1" -) - -;; Predicated load and extend, with 4 elements per 128-bit block. -(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>" - [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w") - (ANY_EXTEND:VNx4_WIDE - (unspec:VNx4_NARROW - [(match_operand:VNx4BI 2 "register_operand" "Upl") - (match_operand:VNx4_NARROW 1 "memory_operand" "m")] - UNSPEC_LD1_SVE)))] - "TARGET_SVE" - "ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.<VNx4_WIDE:Vetype>, %2/z, %1" -) - -;; Predicated load and extend, with 2 elements per 128-bit block. -(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>" - [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w") - (ANY_EXTEND:VNx2_WIDE - (unspec:VNx2_NARROW - [(match_operand:VNx2BI 2 "register_operand" "Upl") - (match_operand:VNx2_NARROW 1 "memory_operand" "m")] - UNSPEC_LD1_SVE)))] - "TARGET_SVE" - "ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.<VNx2_WIDE:Vetype>, %2/z, %1" +(define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>" + [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") + (unspec:SVE_HSDI + [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm") + (ANY_EXTEND:SVE_HSDI + (unspec:SVE_PARTIAL_I + [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")] + UNSPEC_LD1_SVE))] + UNSPEC_PRED_X))] + "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" + "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" + "&& !CONSTANT_P (operands[3])" + { + operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode); + } ) ;; ------------------------------------------------------------------------- @@ -1268,46 +1251,24 @@ ;; - LDNF1W ;; ------------------------------------------------------------------------- -;; Predicated first-faulting or non-faulting load and extend, with 8 elements -;; per 128-bit block. -(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>" - [(set (match_operand:VNx8_WIDE 0 "register_operand" "=w") - (ANY_EXTEND:VNx8_WIDE - (unspec:VNx8_NARROW - [(match_operand:VNx8BI 2 "register_operand" "Upl") - (match_operand:VNx8_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") - (reg:VNx16BI FFRT_REGNUM)] - SVE_LDFF1_LDNF1)))] - "TARGET_SVE" - "ld<fn>f1<ANY_EXTEND:s><VNx8_NARROW:Vesize>\t%0.<VNx8_WIDE:Vetype>, %2/z, %1" -) - -;; Predicated first-faulting or non-faulting load and extend, with 4 elements -;; per 128-bit block. -(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>" - [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w") - (ANY_EXTEND:VNx4_WIDE - (unspec:VNx4_NARROW - [(match_operand:VNx4BI 2 "register_operand" "Upl") - (match_operand:VNx4_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") - (reg:VNx16BI FFRT_REGNUM)] - SVE_LDFF1_LDNF1)))] - "TARGET_SVE" - "ld<fn>f1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.<VNx4_WIDE:Vetype>, %2/z, %1" -) - -;; Predicated first-faulting or non-faulting load and extend, with 2 elements -;; per 128-bit block. -(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>" - [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w") - (ANY_EXTEND:VNx2_WIDE - (unspec:VNx2_NARROW - [(match_operand:VNx2BI 2 "register_operand" "Upl") - (match_operand:VNx2_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") - (reg:VNx16BI FFRT_REGNUM)] - SVE_LDFF1_LDNF1)))] - "TARGET_SVE" - "ld<fn>f1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.<VNx2_WIDE:Vetype>, %2/z, %1" +;; Predicated first-faulting or non-faulting load and extend. +(define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>" + [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") + (unspec:SVE_HSDI + [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm") + (ANY_EXTEND:SVE_HSDI + (unspec:SVE_PARTIAL_I + [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl") + (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") + (reg:VNx16BI FFRT_REGNUM)] + SVE_LDFF1_LDNF1))] + UNSPEC_PRED_X))] + "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" + "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" + "&& !CONSTANT_P (operands[3])" + { + operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode); + } ) ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index d175e1f..305c6da 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -12879,6 +12879,49 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, } } +/* Return true if STMT_INFO extends the result of a load. */ +static bool +aarch64_extending_load_p (stmt_vec_info stmt_info) +{ + gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); + if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) + return false; + + tree rhs = gimple_assign_rhs1 (stmt_info->stmt); + tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); + tree rhs_type = TREE_TYPE (rhs); + if (!INTEGRAL_TYPE_P (lhs_type) + || !INTEGRAL_TYPE_P (rhs_type) + || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type)) + return false; + + stmt_vec_info def_stmt_info = stmt_info->vinfo->lookup_def (rhs); + return (def_stmt_info + && STMT_VINFO_DATA_REF (def_stmt_info) + && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); +} + +/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost + for STMT_INFO, which has cost kind KIND. Adjust the cost as necessary + for SVE targets. */ +static unsigned int +aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, + unsigned int stmt_cost) +{ + /* Unlike vec_promote_demote, vector_stmt conversions do not change the + vector register size or number of units. Integer promotions of this + type therefore map to SXT[BHW] or UXT[BHW]. + + Most loads have extending forms that can do the sign or zero extension + on the fly. Optimistically assume that a load followed by an extension + will fold to this form during combine, and that the extension therefore + comes for free. */ + if (kind == vector_stmt && aarch64_extending_load_p (stmt_info)) + stmt_cost = 0; + + return stmt_cost; +} + /* Implement targetm.vectorize.add_stmt_cost. */ static unsigned aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, @@ -12894,6 +12937,9 @@ aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, int stmt_cost = aarch64_builtin_vectorization_cost (kind, vectype, misalign); + if (stmt_info && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype))) + stmt_cost = aarch64_sve_adjust_stmt_cost (kind, stmt_info, stmt_cost); + /* Statements in an inner loop relative to the loop being vectorized are weighted more heavily. The value here is arbitrary and could potentially be improved with analysis. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6a753a2..e13aa9f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,20 @@ 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/load_extend_1.c: New test. + * gcc.target/aarch64/sve/load_extend_2.c: Likewise. + * gcc.target/aarch64/sve/load_extend_3.c: Likewise. + * gcc.target/aarch64/sve/load_extend_4.c: Likewise. + * gcc.target/aarch64/sve/load_extend_5.c: Likewise. + * gcc.target/aarch64/sve/load_extend_6.c: Likewise. + * gcc.target/aarch64/sve/load_extend_7.c: Likewise. + * gcc.target/aarch64/sve/load_extend_8.c: Likewise. + * gcc.target/aarch64/sve/load_extend_9.c: Likewise. + * gcc.target/aarch64/sve/load_extend_10.c: Likewise. + * gcc.target/aarch64/sve/reduc_4.c: Add + --param aarch64-sve-compare-costs=0. + +2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/cost_model_1.c: Expect the loop to be vectorized with bytes stored in 32-bit containers. * gcc.target/aarch64/sve/extend_1.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_1.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_1.c new file mode 100644 index 0000000..d881fc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_1.c @@ -0,0 +1,31 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_LOOP(TYPE1, TYPE2) \ + void \ + f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst, TYPE1 *restrict src1, \ + TYPE2 *restrict src2, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dst[i] += src1[i] + src2[i]; \ + } + +#define TEST_ALL(T) \ + T (uint16_t, uint8_t) \ + T (uint32_t, uint8_t) \ + T (uint64_t, uint8_t) \ + T (uint32_t, uint16_t) \ + T (uint64_t, uint16_t) \ + T (uint64_t, uint32_t) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tuxt.\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_10.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_10.c new file mode 100644 index 0000000..494c4af --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_10.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include <stdint.h> + +void +f1 (uint64_t *dst, uint32_t *src1, int16_t *src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] += (int32_t) (src1[i] + src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1sh\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsxt.\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxtw\tz[0-9]+\.d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_2.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_2.c new file mode 100644 index 0000000..a21f2f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_2.c @@ -0,0 +1,31 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +#define TEST_LOOP(TYPE1, TYPE2) \ + void \ + f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst, TYPE1 *restrict src1, \ + TYPE2 *restrict src2, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dst[i] += src1[i] + src2[i]; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t) \ + T (int32_t, int8_t) \ + T (int64_t, int8_t) \ + T (int32_t, int16_t) \ + T (int64_t, int16_t) \ + T (int64_t, int32_t) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.h,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1sh\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1sh\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1sw\tz[0-9]+\.d,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tsxt.\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_3.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_3.c new file mode 100644 index 0000000..defd24d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_3.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include <stdint.h> + +void +f1 (uint32_t *dst, uint16_t *src1, uint8_t *src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] += (uint16_t) (src1[i] + src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tuxt.\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_4.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_4.c new file mode 100644 index 0000000..9fe31ef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_4.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include <stdint.h> + +void +f1 (uint64_t *dst, uint16_t *src1, uint8_t *src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] += (uint16_t) (src1[i] + src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tuxt.\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_5.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_5.c new file mode 100644 index 0000000..9876293 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_5.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include <stdint.h> + +void +f1 (uint64_t *dst, uint32_t *src1, uint8_t *src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] += (uint32_t) (src1[i] + src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tuxt.\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_6.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_6.c new file mode 100644 index 0000000..8aee86f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_6.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include <stdint.h> + +void +f1 (uint64_t *dst, uint32_t *src1, uint16_t *src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] += (uint32_t) (src1[i] + src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tuxt.\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_7.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_7.c new file mode 100644 index 0000000..397f4f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_7.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include <stdint.h> + +void +f1 (uint32_t *dst, uint16_t *src1, int8_t *src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] += (int16_t) (src1[i] + src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsxt.\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_8.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_8.c new file mode 100644 index 0000000..cb81f79 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_8.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include <stdint.h> + +void +f1 (uint64_t *dst, uint16_t *src1, int8_t *src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] += (int16_t) (src1[i] + src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsxt.\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/load_extend_9.c b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_9.c new file mode 100644 index 0000000..10a6b8a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/load_extend_9.c @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ + +#include <stdint.h> + +void +f1 (uint64_t *dst, uint32_t *src1, int8_t *src2) +{ + for (int i = 0; i < 7; ++i) + dst[i] += (int32_t) (src1[i] + src2[i]); +} + +/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tsxt.\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tsxtw\tz[0-9]+\.d,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_4.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_4.c index eb4b231..b0260c9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */ double f (double *restrict a, double *restrict b, int *lookup) |