aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2021-11-04 17:36:08 +0000
committerTamar Christina <tamar.christina@arm.com>2021-11-04 17:36:08 +0000
commit1b4a63593bc6b9770789816b205039fdf3cfd3fc (patch)
tree896a17cffe705e24f789af4b40b694c509b666e1 /gcc/config
parentd70720c2382e687e192a9d666e80acb41bfda856 (diff)
downloadgcc-1b4a63593bc6b9770789816b205039fdf3cfd3fc.zip
gcc-1b4a63593bc6b9770789816b205039fdf3cfd3fc.tar.gz
gcc-1b4a63593bc6b9770789816b205039fdf3cfd3fc.tar.bz2
AArch64: Lower intrinsics shift to GIMPLE when possible.
This lowers shifts to GIMPLE when the C interpretations of the shift operations matches that of AArch64. In C shifting right by BITSIZE is undefined, but the behavior is defined in AArch64. Additionally negative shifts lefts are undefined for the register variant of the instruction (SSHL, USHL) as being right shifts. Since we have a right shift by immediate I rewrite those cases into right shifts So: int64x1_t foo3 (int64x1_t a) { return vshl_s64 (a, vdup_n_s64(-6)); } produces: foo3: sshr d0, d0, 6 ret instead of: foo3: mov x0, -6 fmov d1, x0 sshl d0, d0, d1 ret This behavior isn't specifically mentioned for a left shift by immediate, but I believe that only the case because we do have a right shift by immediate but not a right shift by register. As such I do the same for left shift by immediate. gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (aarch64_general_gimple_fold_builtin): Add ashl, sshl, ushl, ashr, ashr_simd, lshr, lshr_simd. * config/aarch64/aarch64-simd-builtins.def (lshr): Use USHIFTIMM. * config/aarch64/arm_neon.h (vshr_n_u8, vshr_n_u16, vshr_n_u32, vshrq_n_u8, vshrq_n_u16, vshrq_n_u32, vshrq_n_u64): Fix type hack. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-2.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-3.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-4.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-5.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-6.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-7.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vshl-opt-8.c: New test. * gcc.target/aarch64/signbit-2.c: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c48
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def2
-rw-r--r--gcc/config/aarch64/arm_neon.h14
3 files changed, 56 insertions, 8 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ed91c2b..5053bf0 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2719,6 +2719,54 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
1, args[0]);
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
break;
+ BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
+ if (TREE_CODE (args[1]) == INTEGER_CST
+ && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ LSHIFT_EXPR, args[0], args[1]);
+ break;
+ BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
+ BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
+ {
+ tree cst = args[1];
+ tree ctype = TREE_TYPE (cst);
+ /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
+ treated as a scalar type not a vector one. */
+ if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
+ {
+ wide_int wcst = wi::to_wide (cst);
+ tree unit_ty = TREE_TYPE (cst);
+
+ wide_int abs_cst = wi::abs (wcst);
+ if (wi::geu_p (abs_cst, element_precision (args[0])))
+ break;
+
+ if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
+ {
+ tree final_cst;
+ final_cst = wide_int_to_tree (unit_ty, abs_cst);
+ if (TREE_CODE (cst) != INTEGER_CST)
+ final_cst = build_uniform_cst (ctype, final_cst);
+
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ RSHIFT_EXPR, args[0],
+ final_cst);
+ }
+ else
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ LSHIFT_EXPR, args[0], args[1]);
+ }
+ }
+ break;
+ BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
+ VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
+ BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
+ VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
+ if (TREE_CODE (args[1]) == INTEGER_CST
+ && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+ RSHIFT_EXPR, args[0], args[1]);
+ break;
BUILTIN_GPF (BINOP, fmulx, 0, ALL)
{
gcc_assert (nargs == 2);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 6546e91..4a7e2cf 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -436,7 +436,7 @@
BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
- BUILTIN_VDQ_I (SHIFTIMM, lshr, 3, NONE)
+ BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
/* Implemented by aarch64_<sur>shr_n<mode>. */
BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0, NONE)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 9838c39..398a2e3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -24128,21 +24128,21 @@ __extension__ extern __inline uint8x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshr_n_u8 (uint8x8_t __a, const int __b)
{
- return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
+ return __builtin_aarch64_lshrv8qi_uus (__a, __b);
}
__extension__ extern __inline uint16x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshr_n_u16 (uint16x4_t __a, const int __b)
{
- return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
+ return __builtin_aarch64_lshrv4hi_uus (__a, __b);
}
__extension__ extern __inline uint32x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshr_n_u32 (uint32x2_t __a, const int __b)
{
- return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
+ return __builtin_aarch64_lshrv2si_uus (__a, __b);
}
__extension__ extern __inline uint64x1_t
@@ -24184,28 +24184,28 @@ __extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrq_n_u8 (uint8x16_t __a, const int __b)
{
- return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
+ return __builtin_aarch64_lshrv16qi_uus (__a, __b);
}
__extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrq_n_u16 (uint16x8_t __a, const int __b)
{
- return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
+ return __builtin_aarch64_lshrv8hi_uus (__a, __b);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrq_n_u32 (uint32x4_t __a, const int __b)
{
- return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
+ return __builtin_aarch64_lshrv4si_uus (__a, __b);
}
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vshrq_n_u64 (uint64x2_t __a, const int __b)
{
- return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
+ return __builtin_aarch64_lshrv2di_uus (__a, __b);
}
__extension__ extern __inline int64_t