AArch64: Improve immediate expansion [PR105928]

Support immediate expansion of immediates which can be created from 2 MOVKs and a shifted ORR or BIC instruction. Change aarch64_split_dimode_const_store to apply if we save one instruction. This reduces the number of 4-instruction immediates in SPECINT/FP by 5%. gcc/ChangeLog: PR target/105928 * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) Add support for immediates using shifted ORR/BIC. (aarch64_split_dimode_const_store): Apply if we save one instruction. * config/aarch64/aarch64.md (<LOGICAL:optab>_<SHIFT:optab><mode>3): Make pattern global. gcc/testsuite: PR target/105928 * gcc.target/aarch64/pr105928.c: Add new test. * gcc.target/aarch64/vect-cse-codegen.c: Fix test.
author: Wilco Dijkstra <wilco.dijkstra@arm.com> 2023-09-13 13:21:50 +0100
committer: Wilco Dijkstra <wilco.dijkstra@arm.com> 2023-09-18 13:27:44 +0100
commit: fc7070025d1a6668ff6cb4391f84771a7662def7 (patch)
tree: 66a40b6263d5ca9ecc7fc04a9e032f1085bad85d /gcc/config/aarch64/aarch64.cc
parent: 64d5bc35c8c2a66ac133a3e6ace820b0ad8a63fb (diff)
download: gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.zip
gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.tar.gz
gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.tar.bz2
1 files changed, 32 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 7bb1161..219c4ee 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5640,7 +5640,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
 				machine_mode mode)
 {
   int i;
-  unsigned HOST_WIDE_INT val, val2, mask;
+  unsigned HOST_WIDE_INT val, val2, val3, mask;
   int one_match, zero_match;
   int num_insns;
 
@@ -5722,6 +5722,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
 		}
 	      return 3;
 	    }
+
+      /* Try shifting and inserting the bottom 32-bits into the top bits.  */
+      val2 = val & 0xffffffff;
+      val3 = 0xffffffff;
+      val3 = val2 | (val3 << 32);
+      for (i = 17; i < 48; i++)
+	if ((val2 | (val2 << i)) == val)
+	  {
+	    if (generate)
+	      {
+		emit_insn (gen_rtx_SET (dest, GEN_INT (val2 & 0xffff)));
+		emit_insn (gen_insv_immdi (dest, GEN_INT (16),
+					   GEN_INT (val2 >> 16)));
+		emit_insn (gen_ior_ashldi3 (dest, dest, GEN_INT (i), dest));
+	      }
+	    return 3;
+	  }
+	else if ((val3 & ~(val3 << i)) == val)
+	  {
+	    if (generate)
+	      {
+		emit_insn (gen_rtx_SET (dest, GEN_INT (val3 | 0xffff0000)));
+		emit_insn (gen_insv_immdi (dest, GEN_INT (16),
+					   GEN_INT (val2 >> 16)));
+		emit_insn (gen_and_one_cmpl_ashldi3 (dest, dest, GEN_INT (i),
+						      dest));
+	      }
+	    return 3;
+	  }
     }
 
   /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
@@ -25540,8 +25569,6 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
   rtx lo = gen_lowpart (SImode, src);
   rtx hi = gen_highpart_mode (SImode, DImode, src);
 
-  bool size_p = optimize_function_for_size_p (cfun);
-
   if (!rtx_equal_p (lo, hi))
     return false;
 
@@ -25560,14 +25587,8 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
      MOV	w1, 49370
      MOVK	w1, 0x140, lsl 16
      STP	w1, w1, [x0]
-   So we want to perform this only when we save two instructions
-   or more.  When optimizing for size, however, accept any code size
-   savings we can.  */
-  if (size_p && orig_cost <= lo_cost)
-    return false;
-
-  if (!size_p
-      && (orig_cost <= lo_cost + 1))
+   So we want to perform this when we save at least one instruction.  */
+  if (orig_cost <= lo_cost)
     return false;
 
   rtx mem_lo = adjust_address (dst, SImode, 0);
author	Wilco Dijkstra <wilco.dijkstra@arm.com>	2023-09-13 13:21:50 +0100
committer	Wilco Dijkstra <wilco.dijkstra@arm.com>	2023-09-18 13:27:44 +0100
commit	fc7070025d1a6668ff6cb4391f84771a7662def7 (patch)
tree	66a40b6263d5ca9ecc7fc04a9e032f1085bad85d /gcc/config/aarch64/aarch64.cc
parent	64d5bc35c8c2a66ac133a3e6ace820b0ad8a63fb (diff)
download	gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.zip gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.tar.gz gcc-fc7070025d1a6668ff6cb4391f84771a7662def7.tar.bz2