AArch64: Block combine_and_move from creating FP literal loads

The IRA combine_and_move pass runs if the scheduler is disabled and aggressively combines moves. The movsf/df patterns allow all FP immediates since they rely on a split pattern. However splits do not happen during IRA, so the result is extra literal loads. To avoid this, split early during expand and block creation of FP immediates that need this split. Mark a few testcases that rely on late splitting as xfail. double f(void) { return 128.0; } -O2 -fno-schedule-insns gives: adrp x0, .LC0 ldr d0, [x0, #:lo12:.LC0] ret After patch: mov x0, 4638707616191610880 fmov d0, x0 ret Passes bootstrap & regress, OK for commit? gcc: * config/aarch64/aarch64.md (movhf_aarch64): Use aarch64_valid_fp_move. (movsf_aarch64): Likewise. (movdf_aarch64): Likewise. * config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function. * config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise. gcc/testsuite: * gcc.target/aarch64/dbl_mov_immediate_1.c: Add xfail for -0.0. * gcc.target/aarch64/fmul_scvtf_1.c: Fixup test cases, add xfail, reduce duplication.
author: Wilco Dijkstra <wilco.dijkstra@arm.com> 2024-11-01 14:44:56 +0000
committer: Wilco Dijkstra <wilco.dijkstra@arm.com> 2025-01-07 18:09:15 +0000
commit: 45d306a835cb3f865a897dc7c04efbe1f9f46c28 (patch)
tree: d1bf42f244e91f25db3cbdae466588fc8b7d1403 /gcc
parent: d3ccd89fa0d34d44226af47fe82c27ba7833fe65 (diff)
download: gcc-45d306a835cb3f865a897dc7c04efbe1f9f46c28.zip
gcc-45d306a835cb3f865a897dc7c04efbe1f9f46c28.tar.gz
gcc-45d306a835cb3f865a897dc7c04efbe1f9f46c28.tar.bz2
5 files changed, 78 insertions, 79 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 70a134f..fa7bc802 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -857,6 +857,7 @@ opt_machine_mode aarch64_v64_mode (scalar_mode);
 opt_machine_mode aarch64_v128_mode (scalar_mode);
 opt_machine_mode aarch64_full_sve_mode (scalar_mode);
 bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
+bool aarch64_valid_fp_move (rtx, rtx, machine_mode);
 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
 bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
 					    HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d6a8e4c..3e700ed 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -11299,6 +11299,36 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
   return aarch64_simd_valid_mov_imm (v_op);
 }
 
+/* Return TRUE if DST and SRC with mode MODE is a valid fp move.  */
+bool
+aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode)
+{
+  if (!TARGET_FLOAT)
+    return false;
+
+  if (aarch64_reg_or_fp_zero (src, mode))
+    return true;
+
+  if (!register_operand (dst, mode))
+    return false;
+
+  if (MEM_P (src))
+    return true;
+
+  if (!DECIMAL_FLOAT_MODE_P (mode))
+    {
+      if (aarch64_can_const_movi_rtx_p (src, mode)
+	  || aarch64_float_const_representable_p (src)
+	  || aarch64_float_const_zero_rtx_p (src))
+	return true;
+
+      /* Block FP immediates which are split during expand.  */
+      if (aarch64_float_const_rtx_p (src))
+	return false;
+    }
+
+  return can_create_pseudo_p ();
+}
 
 /* Return the fixed registers used for condition codes.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 6758a1d..0ed3c93 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1760,14 +1760,33 @@
         && ! (GET_CODE (operands[1]) == CONST_DOUBLE
 	      && aarch64_float_const_zero_rtx_p (operands[1])))
       operands[1] = force_reg (<MODE>mode, operands[1]);
+
+    if (!DECIMAL_FLOAT_MODE_P (<MODE>mode)
+	&& GET_CODE (operands[1]) == CONST_DOUBLE
+	&& can_create_pseudo_p ()
+	&& !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+	&& !aarch64_float_const_representable_p (operands[1])
+	&& !aarch64_float_const_zero_rtx_p (operands[1])
+	&&  aarch64_float_const_rtx_p (operands[1]))
+      {
+	unsigned HOST_WIDE_INT ival;
+	bool res = aarch64_reinterpret_float_as_int (operands[1], &ival);
+	gcc_assert (res);
+
+	machine_mode intmode
+	  = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require ();
+	rtx tmp = gen_reg_rtx (intmode);
+	emit_move_insn (tmp, gen_int_mode (ival, intmode));
+	emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+	DONE;
+      }
   }
 )
 
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:HFBF 0 "nonimmediate_operand")
 	(match_operand:HFBF 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%0.4h, #0
      [ w        , ?rY ; f_mcr       , fp16  ] fmov\t%h0, %w1
@@ -1790,8 +1809,7 @@
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:SFD 0 "nonimmediate_operand")
 	(match_operand:SFD 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%0.2s, #0
      [ w        , ?rY ; f_mcr       , *     ] fmov\t%s0, %w1
@@ -1811,8 +1829,7 @@
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:DFD 0 "nonimmediate_operand")
 	(match_operand:DFD 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%d0, #0
      [ w        , ?rY ; f_mcr       , *     ] fmov\t%d0, %x1
@@ -1829,27 +1846,6 @@
   }
 )
 
-(define_split
-  [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
-	(match_operand:GPF_HF 1 "const_double_operand"))]
-  "can_create_pseudo_p ()
-   && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
-   && !aarch64_float_const_representable_p (operands[1])
-   && !aarch64_float_const_zero_rtx_p (operands[1])
-   &&  aarch64_float_const_rtx_p (operands[1])"
-  [(const_int 0)]
-  {
-    unsigned HOST_WIDE_INT ival;
-    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
-      FAIL;
-
-    rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
-    emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
-    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
-    DONE;
-  }
-)
-
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:TFD 0
 	 "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
diff --git a/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c b/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
index 4838f74..8332035d 100644
--- a/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/dbl_mov_immediate_1.c
@@ -48,8 +48,8 @@ double d4(void)
 
 /* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 25838523252736"       1 } } */
 /* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x40fe, lsl 48"      1 } } */
-/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -9223372036854775808" 0 } } */
-/* { dg-final { scan-assembler-times {movi\tv[0-9]+.4s, #?0} 1 } } */
-/* { dg-final { scan-assembler-times {fneg\tv[0-9]+.2d, v[0-9]+.2d} 1 } } */
-/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, x\[0-9\]+"           1 } } */
+/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -9223372036854775808" 0 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {movi\tv[0-9]+.4s, #?0} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {fneg\tv[0-9]+.2d, v[0-9]+.2d} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, x\[0-9\]+"           1 { xfail *-*-* } } } */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c
index 8bfe06a..198ba0a 100644
--- a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c
@@ -45,53 +45,29 @@ dulfoo##__a (unsigned long long x)		\
   return ((double) x)/(1lu << __a);		\
 }
 
-FUNC_DEFS (4)
-	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
-	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+FUNC_DEFS (2)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#2" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#2" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#2" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#2" 1 } } */
 
-FUNC_DEFD (4)
-	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
-	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
-
-FUNC_DEFS (8)
-	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
-	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
-
-FUNC_DEFD (8)
-	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
-	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
-
-FUNC_DEFS (16)
-	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
-	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
-
-FUNC_DEFD (16)
-	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
-	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+FUNC_DEFD (2)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#2" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#2" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#2" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#2" 1 } } */
 
 FUNC_DEFS (32)
-	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 } } */
-	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
 
 FUNC_DEFD (32)
-	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#32" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#32" 1 } } */
-	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#32" 1 } } */
-	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#32" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
 
 #define FUNC_TESTS(__a, __b)					\
 do								\
@@ -126,14 +102,10 @@ main (void)
 
 	for (i = 0; i < 32; i ++)
 	{
-		FUNC_TESTS (4, i);
-		FUNC_TESTS (8, i);
-		FUNC_TESTS (16, i);
+		FUNC_TESTS (2, i);
 		FUNC_TESTS (32, i);
 
-		FUNC_TESTD (4, i);
-		FUNC_TESTD (8, i);
-		FUNC_TESTD (16, i);
+		FUNC_TESTD (2, i);
 		FUNC_TESTD (32, i);
 	}
 	return 0;
author	Wilco Dijkstra <wilco.dijkstra@arm.com>	2024-11-01 14:44:56 +0000
committer	Wilco Dijkstra <wilco.dijkstra@arm.com>	2025-01-07 18:09:15 +0000
commit	45d306a835cb3f865a897dc7c04efbe1f9f46c28 (patch)
tree	d1bf42f244e91f25db3cbdae466588fc8b7d1403 /gcc
parent	d3ccd89fa0d34d44226af47fe82c27ba7833fe65 (diff)
download	gcc-45d306a835cb3f865a897dc7c04efbe1f9f46c28.zip gcc-45d306a835cb3f865a897dc7c04efbe1f9f46c28.tar.gz gcc-45d306a835cb3f865a897dc7c04efbe1f9f46c28.tar.bz2