aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Wright <jonathan.wright@arm.com>2021-06-14 13:16:35 +0100
committerJonathan Wright <jonathan.wright@arm.com>2021-06-16 14:22:08 +0100
commitc86a3039683a8d2bb1006c1a0277678de3786ceb (patch)
treed13b0ffb47513e9680d11ecf13fbcace8cc1f25f
parentd8a88cdae9c0c42ab7c5c65a5043c4f8bad349d2 (diff)
downloadgcc-c86a3039683a8d2bb1006c1a0277678de3786ceb.zip
gcc-c86a3039683a8d2bb1006c1a0277678de3786ceb.tar.gz
gcc-c86a3039683a8d2bb1006c1a0277678de3786ceb.tar.bz2
aarch64: Model zero-high-half semantics of SQXTUN instruction in RTL
Split the aarch64_sqmovun<mode> pattern into separate scalar and vector variants. Further split the vector pattern into big/little endian variants that model the zero-high-half semantics of the underlying instruction. Modeling these semantics allows for better RTL combinations while also removing some register allocation issues as the compiler now knows that the operation is totally destructive. Add new tests to narrow_zero_high_half.c to verify the benefit of this change. gcc/ChangeLog: 2021-06-14 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd-builtins.def: Split generator for aarch64_sqmovun builtins into scalar and vector variants. * config/aarch64/aarch64-simd.md (aarch64_sqmovun<mode>): Split into scalar and vector variants. Change vector variant to an expander that emits the correct instruction depending on endianness. (aarch64_sqmovun<mode>_insn_le): Define. (aarch64_sqmovun<mode>_insn_be): Define. gcc/testsuite/ChangeLog: * gcc.target/aarch64/narrow_zero_high_half.c: Add new tests.
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def4
-rw-r--r--gcc/config/aarch64/aarch64-simd.md66
-rw-r--r--gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c5
3 files changed, 63 insertions, 12 deletions
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 18baa67..2adb4b1 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -263,7 +263,9 @@
BUILTIN_VQ_HSI (TERNOP, smlal_hi_n, 0, NONE)
BUILTIN_VQ_HSI (TERNOPU, umlal_hi_n, 0, NONE)
- BUILTIN_VSQN_HSDI (UNOPUS, sqmovun, 0, NONE)
+ /* Implemented by aarch64_sqmovun<mode>. */
+ BUILTIN_VQN (UNOPUS, sqmovun, 0, NONE)
+ BUILTIN_SD_HSDI (UNOPUS, sqmovun, 0, NONE)
/* Implemented by aarch64_sqxtun2<mode>. */
BUILTIN_VQN (BINOP_UUS, sqxtun2, 0, NONE)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index b23556b..59779b8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4870,17 +4870,6 @@
[(set_attr "type" "neon_qadd<q>")]
)
-;; sqmovun
-
-(define_insn "aarch64_sqmovun<mode>"
- [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
- (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
- UNSPEC_SQXTUN))]
- "TARGET_SIMD"
- "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
- [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
;; sqmovn and uqmovn
(define_insn "aarch64_<su>qmovn<mode>"
@@ -4931,6 +4920,61 @@
}
)
+;; sqmovun
+
+(define_insn "aarch64_sqmovun<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+ (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
+ UNSPEC_SQXTUN))]
+ "TARGET_SIMD"
+ "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+ [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_sqmovun<mode>_insn_le"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
+ UNSPEC_SQXTUN)
+ (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+ "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+ [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_sqmovun<mode>_insn_be"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
+ UNSPEC_SQXTUN)))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+ [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqmovun<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")]
+ UNSPEC_SQXTUN))]
+ "TARGET_SIMD"
+ {
+ rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_sqmovun<mode>_insn_be (tmp, operands[1],
+ CONST0_RTX (<VNARROWQ>mode)));
+ else
+ emit_insn (gen_aarch64_sqmovun<mode>_insn_le (tmp, operands[1],
+ CONST0_RTX (<VNARROWQ>mode)));
+
+ /* The intrinsic expects a narrow result, so emit a subreg that will get
+ optimized away as appropriate. */
+ emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+ <VNARROWQ2>mode));
+ DONE;
+ }
+)
+
(define_insn "aarch64_sqxtun2<mode>_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
(vec_concat:<VNARROWQ2>
diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
index 451b011..53e03d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
+++ b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
@@ -63,6 +63,10 @@ TEST_UNARY (vmovn, uint8x16_t, uint16x8_t, u16, u8)
TEST_UNARY (vmovn, uint16x8_t, uint32x4_t, u32, u16)
TEST_UNARY (vmovn, uint32x4_t, uint64x2_t, u64, u32)
+TEST_UNARY (vqmovun, uint8x16_t, int16x8_t, s16, u8)
+TEST_UNARY (vqmovun, uint16x8_t, int32x4_t, s32, u16)
+TEST_UNARY (vqmovun, uint32x4_t, int64x2_t, s64, u32)
+
/* { dg-final { scan-assembler-not "dup\\t" } } */
/* { dg-final { scan-assembler-times "\\tshrn\\tv" 6} } */
@@ -74,3 +78,4 @@ TEST_UNARY (vmovn, uint32x4_t, uint64x2_t, u64, u32)
/* { dg-final { scan-assembler-times "\\tsqshrun\\tv" 3} } */
/* { dg-final { scan-assembler-times "\\tsqrshrun\\tv" 3} } */
/* { dg-final { scan-assembler-times "\\txtn\\tv" 6} } */
+/* { dg-final { scan-assembler-times "\\tsqxtun\\tv" 3} } */