diff options
author | Andre Simoes Dias Vieira <andre.simoesdiasvieira@arm.com> | 2021-10-20 13:19:10 +0100 |
---|---|---|
committer | Andre Vieira <andre.simoesdiasvieira@arm.com> | 2021-10-20 13:22:27 +0100 |
commit | ad44c6a56c777bd1eddb214095fff36c8dba9246 (patch) | |
tree | 2a2df53ba2c33f382cfeb88da41b8c1c473183a6 /gcc/config | |
parent | 914045dff10fbd27de27b90a0ac78a0058b2c86e (diff) | |
download | gcc-ad44c6a56c777bd1eddb214095fff36c8dba9246.zip gcc-ad44c6a56c777bd1eddb214095fff36c8dba9246.tar.gz gcc-ad44c6a56c777bd1eddb214095fff36c8dba9246.tar.bz2 |
[Patch][GCC][AArch64] - Lower store and load neon builtins to gimple
20-10-2021 Andre Vieira <andre.simoesdiasvieira@arm.com>
Jirui Wu <jirui.wu@arm.com>
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.c
(aarch64_general_gimple_fold_builtin):
lower vld1 and vst1 variants of the neon builtins
* config/aarch64/aarch64-protos.h:
(aarch64_general_gimple_fold_builtin): Add gsi parameter.
* config/aarch64/aarch64.c (aarch64_general_gimple_fold_builtin):
Likwise.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/fmla_intrinsic_1.c: prevent over optimization.
* gcc.target/aarch64/fmls_intrinsic_1.c: Likewise.
* gcc.target/aarch64/fmul_intrinsic_1.c: Likewise.
* gcc.target/aarch64/mla_intrinsic_1.c: Likewise.
* gcc.target/aarch64/mls_intrinsic_1.c: Likewise.
* gcc.target/aarch64/mul_intrinsic_1.c: Likewise.
* gcc.target/aarch64/simd/vmul_elem_1.c: Likewise.
* gcc.target/aarch64/vclz.c: Likewise.
* gcc.target/aarch64/vneg_s.c: Likewise.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.c | 103 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 2 |
3 files changed, 105 insertions, 3 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 1a507ea..a815e4c 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -46,6 +46,7 @@ #include "emit-rtl.h" #include "stringpool.h" #include "attribs.h" +#include "gimple-fold.h" #define v8qi_UP E_V8QImode #define v4hi_UP E_V4HImode @@ -2399,11 +2400,65 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type, return NULL_TREE; } +enum aarch64_simd_type +get_mem_type_for_load_store (unsigned int fcode) +{ + switch (fcode) + { + VAR1 (LOAD1, ld1 , 0, LOAD, v8qi) + VAR1 (STORE1, st1 , 0, STORE, v8qi) + return Int8x8_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v16qi) + VAR1 (STORE1, st1 , 0, STORE, v16qi) + return Int8x16_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v4hi) + VAR1 (STORE1, st1 , 0, STORE, v4hi) + return Int16x4_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v8hi) + VAR1 (STORE1, st1 , 0, STORE, v8hi) + return Int16x8_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v2si) + VAR1 (STORE1, st1 , 0, STORE, v2si) + return Int32x2_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v4si) + VAR1 (STORE1, st1 , 0, STORE, v4si) + return Int32x4_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v2di) + VAR1 (STORE1, st1 , 0, STORE, v2di) + return Int64x2_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v4hf) + VAR1 (STORE1, st1 , 0, STORE, v4hf) + return Float16x4_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v8hf) + VAR1 (STORE1, st1 , 0, STORE, v8hf) + return Float16x8_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v4bf) + VAR1 (STORE1, st1 , 0, STORE, v4bf) + return Bfloat16x4_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v8bf) + VAR1 (STORE1, st1 , 0, STORE, v8bf) + return Bfloat16x8_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v2sf) + VAR1 (STORE1, st1 , 0, STORE, v2sf) + return Float32x2_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v4sf) + VAR1 (STORE1, st1 , 0, STORE, v4sf) + return Float32x4_t; + VAR1 (LOAD1, ld1 , 0, LOAD, v2df) + VAR1 (STORE1, st1 , 0, STORE, v2df) + return Float64x2_t; + default: + gcc_unreachable (); + break; + } +} + /* Try to fold STMT, given that it's a call to the built-in function with subcode FCODE. Return the new statement on success and null on failure. */ gimple * -aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt) +aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, + gimple_stmt_iterator *gsi) { gimple *new_stmt = NULL; unsigned nargs = gimple_call_num_args (stmt); @@ -2421,6 +2476,52 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt) 1, args[0]); gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); break; + + /*lower store and load neon builtins to gimple. */ + BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD) + if (!BYTES_BIG_ENDIAN) + { + enum aarch64_simd_type mem_type + = get_mem_type_for_load_store(fcode); + aarch64_simd_type_info simd_type + = aarch64_simd_types[mem_type]; + tree elt_ptr_type = build_pointer_type (simd_type.eltype); + tree zero = build_zero_cst (elt_ptr_type); + gimple_seq stmts = NULL; + tree base = gimple_convert (&stmts, elt_ptr_type, + args[0]); + if (stmts) + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + new_stmt + = gimple_build_assign (gimple_get_lhs (stmt), + fold_build2 (MEM_REF, + simd_type.itype, + base, zero)); + } + break; + + BUILTIN_VALL_F16 (STORE1, st1, 0, STORE) + if (!BYTES_BIG_ENDIAN) + { + enum aarch64_simd_type mem_type + = get_mem_type_for_load_store(fcode); + aarch64_simd_type_info simd_type + = aarch64_simd_types[mem_type]; + tree elt_ptr_type = build_pointer_type (simd_type.eltype); + tree zero = build_zero_cst (elt_ptr_type); + gimple_seq stmts = NULL; + tree base = gimple_convert (&stmts, elt_ptr_type, + args[0]); + if (stmts) + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + new_stmt + = gimple_build_assign (fold_build2 (MEM_REF, + simd_type.itype, + base, + zero), args[1]); + } + break; + BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL) BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL) new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index b91eeeb..768e8fa 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -962,7 +962,8 @@ void aarch64_override_options_internal (struct gcc_options *); const char *aarch64_general_mangle_builtin_type (const_tree); void aarch64_general_init_builtins (void); tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *); -gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *); +gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *, + gimple_stmt_iterator *); rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); tree aarch64_general_builtin_decl (unsigned, bool); tree aarch64_general_builtin_rsqrt (unsigned int); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index fdf3418..730607f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14156,7 +14156,7 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) switch (code & AARCH64_BUILTIN_CLASS) { case AARCH64_BUILTIN_GENERAL: - new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt); + new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt, gsi); break; case AARCH64_BUILTIN_SVE: |