aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2020-09-30 11:52:06 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2020-09-30 11:52:06 +0100
commitd4f9e81976066e1d67c8dd5ddaf24ebe3b0695ed (patch)
tree8707fbaee9b6f6dba8166470df3b42c3aa1e01b5 /gcc
parentf63023fafbbc13545fef67e6b32a55b48a945fcf (diff)
downloadgcc-d4f9e81976066e1d67c8dd5ddaf24ebe3b0695ed.zip
gcc-d4f9e81976066e1d67c8dd5ddaf24ebe3b0695ed.tar.gz
gcc-d4f9e81976066e1d67c8dd5ddaf24ebe3b0695ed.tar.bz2
aarch64: Tweak movti and movtf patterns
movti lacked an way of zeroing an FPR, meaning that we'd do: mov x0, 0 mov x1, 0 fmov d0, x0 fmov v0.d[1], x1 instead of just: movi v0.2d, #0 movtf had the opposite problem for GPRs: we'd generate: movi v0.2d, #0 fmov x0, d0 fmov x1, v0.d[1] instead of just: mov x0, 0 mov x1, 0 Also, there was an unnecessary earlyclobber on the GPR<-GPR movtf alternative (but not the movti one). The splitter handles overlap correctly. The TF splitter used aarch64_reg_or_imm, but the _imm part only accepts integer constants, not floating-point ones. The patch changes it to nonmemory_operand instead. gcc/ * config/aarch64/aarch64.c (aarch64_split_128bit_move_p): Add a function comment. Tighten check for FP moves. * config/aarch64/aarch64.md (*movti_aarch64): Add a w<-Z alternative. (*movtf_aarch64): Handle r<-Y like r<-r. Remove unnecessary earlyclobber. Change splitter predicate from aarch64_reg_or_imm to nonmemory_operand. gcc/testsuite/ * gcc.target/aarch64/movtf_1.c: New test. * gcc.target/aarch64/movti_1.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64.c9
-rw-r--r--gcc/config/aarch64/aarch64.md17
-rw-r--r--gcc/testsuite/gcc.target/aarch64/movtf_1.c87
-rw-r--r--gcc/testsuite/gcc.target/aarch64/movti_1.c87
4 files changed, 190 insertions, 10 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 491fc58..9e88438 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3422,11 +3422,16 @@ aarch64_split_128bit_move (rtx dst, rtx src)
}
}
+/* Return true if we should split a move from 128-bit value SRC
+ to 128-bit register DEST. */
+
bool
aarch64_split_128bit_move_p (rtx dst, rtx src)
{
- return (! REG_P (src)
- || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
+ if (FP_REGNUM_P (REGNO (dst)))
+ return REG_P (src) && !FP_REGNUM_P (REGNO (src));
+ /* All moves to GPRs need to be split. */
+ return true;
}
/* Split a complex SIMD combine. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 19ec9e3..78fe7c43 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1361,13 +1361,14 @@
(define_insn "*movti_aarch64"
[(set (match_operand:TI 0
- "nonimmediate_operand" "= r,w, r,w,r,m,m,w,m")
+ "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m")
(match_operand:TI 1
- "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))]
+ "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
"(register_operand (operands[0], TImode)
|| aarch64_reg_or_zero (operands[1], TImode))"
"@
#
+ movi\\t%0.2d, #0
#
#
mov\\t%0.16b, %1.16b
@@ -1376,11 +1377,11 @@
stp\\txzr, xzr, %0
ldr\\t%q0, %1
str\\t%q1, %0"
- [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
+ [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
load_16,store_16,store_16,\
load_16,store_16")
- (set_attr "length" "8,8,8,4,4,4,4,4,4")
- (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")]
+ (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
+ (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
)
;; Split a TImode register-register or register-immediate move into
@@ -1511,9 +1512,9 @@
(define_insn "*movtf_aarch64"
[(set (match_operand:TF 0
- "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
+ "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
(match_operand:TF 1
- "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
+ "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
"TARGET_FLOAT && (register_operand (operands[0], TFmode)
|| aarch64_reg_or_fp_zero (operands[1], TFmode))"
"@
@@ -1536,7 +1537,7 @@
(define_split
[(set (match_operand:TF 0 "register_operand" "")
- (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
+ (match_operand:TF 1 "nonmemory_operand" ""))]
"reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
[(const_int 0)]
{
diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_1.c b/gcc/testsuite/gcc.target/aarch64/movtf_1.c
new file mode 100644
index 0000000..570de93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movtf_1.c
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** zero_q:
+** movi v0.2d, #0
+** ret
+*/
+void
+zero_q ()
+{
+ register _Float128 q0 asm ("q0");
+ q0 = 0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** load_q:
+** ldr q0, \[x0\]
+** ret
+*/
+void
+load_q (_Float128 *ptr)
+{
+ register _Float128 q0 asm ("q0");
+ q0 = *ptr;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** store_q:
+** str q0, \[x0\]
+** ret
+*/
+void
+store_q (_Float128 *ptr)
+{
+ register _Float128 q0 asm ("q0");
+ asm volatile ("" : "=w" (q0));
+ *ptr = q0;
+}
+
+/*
+** zero_x:
+** (
+** mov x0, #?0
+** mov x1, #?0
+** |
+** mov x1, #?0
+** mov x0, #?0
+** )
+** ret
+*/
+void
+zero_x ()
+{
+ register _Float128 x0 asm ("x0");
+ x0 = 0;
+ asm volatile ("" :: "r" (x0));
+}
+
+/*
+** load_x:
+** ldp x2, x3, \[x0\]
+** ret
+*/
+void
+load_x (_Float128 *ptr)
+{
+ register _Float128 x2 asm ("x2");
+ x2 = *ptr;
+ asm volatile ("" :: "r" (x2));
+}
+
+/*
+** store_x:
+** stp x2, x3, \[x0\]
+** ret
+*/
+void
+store_x (_Float128 *ptr)
+{
+ register _Float128 x2 asm ("x2");
+ asm volatile ("" : "=r" (x2));
+ *ptr = x2;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movti_1.c b/gcc/testsuite/gcc.target/aarch64/movti_1.c
new file mode 100644
index 0000000..160e1ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movti_1.c
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** zero_q:
+** movi v0.2d, #0
+** ret
+*/
+void
+zero_q ()
+{
+ register __int128_t q0 asm ("q0");
+ q0 = 0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** load_q:
+** ldr q0, \[x0\]
+** ret
+*/
+void
+load_q (__int128_t *ptr)
+{
+ register __int128_t q0 asm ("q0");
+ q0 = *ptr;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** store_q:
+** str q0, \[x0\]
+** ret
+*/
+void
+store_q (__int128_t *ptr)
+{
+ register __int128_t q0 asm ("q0");
+ asm volatile ("" : "=w" (q0));
+ *ptr = q0;
+}
+
+/*
+** zero_x:
+** (
+** mov x0, #?0
+** mov x1, #?0
+** |
+** mov x1, #?0
+** mov x0, #?0
+** )
+** ret
+*/
+void
+zero_x ()
+{
+ register __int128_t x0 asm ("x0");
+ x0 = 0;
+ asm volatile ("" :: "r" (x0));
+}
+
+/*
+** load_x:
+** ldp x2, x3, \[x0\]
+** ret
+*/
+void
+load_x (__int128_t *ptr)
+{
+ register __int128_t x2 asm ("x2");
+ x2 = *ptr;
+ asm volatile ("" :: "r" (x2));
+}
+
+/*
+** store_x:
+** stp x2, x3, \[x0\]
+** ret
+*/
+void
+store_x (__int128_t *ptr)
+{
+ register __int128_t x2 asm ("x2");
+ asm volatile ("" : "=r" (x2));
+ *ptr = x2;
+}