recog: Handle some mode-changing hardreg propagations

insn_propagation would previously only replace (reg:M H) with X for some hard register H if the uses of H were also in mode M. This patch extends it to handle simple mode punning too. The original motivation was to try to get rid of the execution frequency test in aarch64_split_simd_shift_p, but doing that is follow-up work. I tried this on at least one target per CPU directory (as for the late-combine patches) and it seems to be a small win for all of them. The patch includes a couple of updates to the ia32 results. In pr105033.c, foo3 replaced: vmovq 8(%esp), %xmm1 vpunpcklqdq %xmm1, %xmm0, %xmm0 with: vmovhps 8(%esp), %xmm0, %xmm0 In vect-bfloat16-2b.c, 5 of the vec_extract_v32bf_* routines (specifically the ones with nonzero even indices) replaced things like: movl 28(%esp), %eax vmovd %eax, %xmm0 with: vpinsrw $0, 28(%esp), %xmm0, %xmm0 (These functions return a bf16, and so only the low 16 bits matter.) gcc/ * recog.cc (insn_propagation::apply_to_rvalue_1): Handle simple cases of hardreg propagation in which the register is set and used in different modes. gcc/testsuite/ * gcc.target/i386/pr105033.c: Expect vmovhps for the ia32 version of foo. * gcc.target/i386/vect-bfloat16-2b.c: Expect more vpinsrws.
author: Richard Sandiford <richard.sandiford@arm.com> 2024-07-10 17:01:29 +0100
committer: Richard Sandiford <richard.sandiford@arm.com> 2024-07-10 17:01:29 +0100
commit: 9d20529d94b23275885f380d155fe8671ab5353a (patch)
tree: 359bdd40cbbfece37d81423527ee117cf053b47d /gcc
parent: e08ebd7d77a216ee2313b585c370333c66497b53 (diff)
download: gcc-9d20529d94b23275885f380d155fe8671ab5353a.zip
gcc-9d20529d94b23275885f380d155fe8671ab5353a.tar.gz
gcc-9d20529d94b23275885f380d155fe8671ab5353a.tar.bz2
3 files changed, 29 insertions, 8 deletions
diff --git a/gcc/recog.cc b/gcc/recog.cc
index 56370e4..36507f3 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -1055,7 +1055,11 @@ insn_propagation::apply_to_rvalue_1 (rtx *loc)
   machine_mode mode = GET_MODE (x);
 
   auto old_num_changes = num_validated_changes ();
-  if (from && GET_CODE (x) == GET_CODE (from) && rtx_equal_p (x, from))
+  if (from
+      && GET_CODE (x) == GET_CODE (from)
+      && (REG_P (x)
+	  ? REGNO (x) == REGNO (from)
+	  : rtx_equal_p (x, from)))
     {
       /* Don't replace register asms in asm statements; we mustn't
 	 change the user's register allocation.  */
@@ -1065,11 +1069,26 @@ insn_propagation::apply_to_rvalue_1 (rtx *loc)
 	  && asm_noperands (PATTERN (insn)) > 0)
 	return false;
 
+      rtx newval = to;
+      if (GET_MODE (x) != GET_MODE (from))
+	{
+	  gcc_assert (REG_P (x) && HARD_REGISTER_P (x));
+	  if (REG_NREGS (x) != REG_NREGS (from)
+	      || !REG_CAN_CHANGE_MODE_P (REGNO (x), GET_MODE (from),
+					 GET_MODE (x)))
+	    return false;
+	  newval = simplify_subreg (GET_MODE (x), to, GET_MODE (from),
+				    subreg_lowpart_offset (GET_MODE (x),
+							   GET_MODE (from)));
+	  if (!newval)
+	    return false;
+	}
+
       if (should_unshare)
-	validate_unshare_change (insn, loc, to, 1);
+	validate_unshare_change (insn, loc, newval, 1);
       else
-	validate_change (insn, loc, to, 1);
-      if (mem_depth && !REG_P (to) && !CONSTANT_P (to))
+	validate_change (insn, loc, newval, 1);
+      if (mem_depth && !REG_P (newval) && !CONSTANT_P (newval))
 	{
 	  /* We're substituting into an address, but TO will have the
 	     form expected outside an address.  Canonicalize it if
@@ -1083,9 +1102,9 @@ insn_propagation::apply_to_rvalue_1 (rtx *loc)
 	    {
 	      /* TO is owned by someone else, so create a copy and
 		 return TO to its original form.  */
-	      rtx to = copy_rtx (*loc);
+	      newval = copy_rtx (*loc);
 	      cancel_changes (old_num_changes);
-	      validate_change (insn, loc, to, 1);
+	      validate_change (insn, loc, newval, 1);
 	    }
 	}
       num_replacements += 1;
diff --git a/gcc/testsuite/gcc.target/i386/pr105033.c b/gcc/testsuite/gcc.target/i386/pr105033.c
index ab05e3b..10e3978 100644
--- a/gcc/testsuite/gcc.target/i386/pr105033.c
+++ b/gcc/testsuite/gcc.target/i386/pr105033.c
@@ -1,6 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-march=sapphirerapids -O2" } */
-/* { dg-final { scan-assembler-times {vpunpcklqdq[ \t]+} 3 } } */
+/* { dg-final { scan-assembler-times {vpunpcklqdq[ \t]+} 3 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times {vpunpcklqdq[ \t]+} 2 { target ia32 } } } */
+/* { dg-final { scan-assembler-times {vmovhps[ \t]+} 1 { target ia32 } } } */
 /* { dg-final { scan-assembler-not {vpermi2[wb][ \t]+} } } */
 
 typedef _Float16 v8hf __attribute__((vector_size (16)));
diff --git a/gcc/testsuite/gcc.target/i386/vect-bfloat16-2b.c b/gcc/testsuite/gcc.target/i386/vect-bfloat16-2b.c
index 29bf601..0d1e14d 100644
--- a/gcc/testsuite/gcc.target/i386/vect-bfloat16-2b.c
+++ b/gcc/testsuite/gcc.target/i386/vect-bfloat16-2b.c
@@ -17,6 +17,6 @@
 
 /* { dg-final { scan-assembler-times "vpbroadcastw" 6 { target ia32 } } } */
 /* { dg-final { scan-assembler-times "vpblendw" 3 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "vpinsrw" 63 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vpinsrw" 68 { target ia32 } } } */
 
 /* { dg-final { scan-assembler-times "vpblendd" 3 } } */
author	Richard Sandiford <richard.sandiford@arm.com>	2024-07-10 17:01:29 +0100
committer	Richard Sandiford <richard.sandiford@arm.com>	2024-07-10 17:01:29 +0100
commit	9d20529d94b23275885f380d155fe8671ab5353a (patch)
tree	359bdd40cbbfece37d81423527ee117cf053b47d /gcc
parent	e08ebd7d77a216ee2313b585c370333c66497b53 (diff)
download	gcc-9d20529d94b23275885f380d155fe8671ab5353a.zip gcc-9d20529d94b23275885f380d155fe8671ab5353a.tar.gz gcc-9d20529d94b23275885f380d155fe8671ab5353a.tar.bz2