aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Wright <jonathan.wright@arm.com>2021-03-04 12:36:09 +0000
committerJonathan Wright <jonathan.wright@arm.com>2021-05-19 14:44:10 +0100
commit4e26303e0b90038473e3d7490dc0369a74866b1b (patch)
treee478d5f9b8d2f3e54b637500bb381e3d96b459a1
parent3eddaad02dcce21fb67c42cc6e1e8f951a630ac1 (diff)
downloadgcc-4e26303e0b90038473e3d7490dc0369a74866b1b.zip
gcc-4e26303e0b90038473e3d7490dc0369a74866b1b.tar.gz
gcc-4e26303e0b90038473e3d7490dc0369a74866b1b.tar.bz2
aarch64: Relax aarch64_<sur>q<r>shr<u>n2_n<mode> RTL pattern
Implement saturating right-shift and narrow high Neon intrinsic RTL patterns using a vec_concat of a register_operand and a VQSHRN_N unspec - instead of just a VQSHRN_N unspec. This more relaxed pattern allows for more aggressive combinations and ultimately better code generation. gcc/ChangeLog: 2021-03-04 Jonathan Wright <jonathan.wright@arm.com> * config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n2_n<mode>): Implement as an expand emitting a big/little endian instruction pattern. (aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le): Define. (aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be): Define.
-rw-r--r--gcc/config/aarch64/aarch64-simd.md49
1 files changed, 43 insertions, 6 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1efc854..5473d61 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6054,17 +6054,54 @@
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
-(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>"
+(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
- (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
- (match_operand:VQN 2 "register_operand" "w")
- (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
- VQSHRN_N))]
- "TARGET_SIMD"
+ (vec_concat:<VNARROWQ2>
+ (match_operand:<VNARROWQ> 1 "register_operand" "0")
+ (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
+ (match_operand:VQN 3
+ "aarch64_simd_shift_imm_vec_<vn_mode>")]
+ VQSHRN_N)))]
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
)
+(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ (vec_concat:<VNARROWQ2>
+ (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
+ (match_operand:VQN 3
+ "aarch64_simd_shift_imm_vec_<vn_mode>")]
+ VQSHRN_N)
+ (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+ [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>"
+ [(match_operand:<VNARROWQ2> 0 "register_operand")
+ (match_operand:<VNARROWQ> 1 "register_operand")
+ (unspec:<VNARROWQ>
+ [(match_operand:VQN 2 "register_operand")
+ (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
+ VQSHRN_N)]
+ "TARGET_SIMD"
+ {
+ operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ INTVAL (operands[3]));
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be (operands[0],
+ operands[1], operands[2], operands[3]));
+ else
+ emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le (operands[0],
+ operands[1], operands[2], operands[3]));
+ DONE;
+ }
+)
+
;; cm(eq|ge|gt|lt|le)
;; Note, we have constraints for Dz and Z as different expanders