aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2025-08-06 09:21:55 +0000
committerRichard Henderson <richard.henderson@linaro.org>2025-08-11 23:25:09 +0000
commit21d94bf16f124e22fc7ad3c2e4083374eba03c67 (patch)
tree17a50139ee425b33258bbd8ba774cd496b35b0a4
parented101b98e25314a5a544297e7224d9572b489804 (diff)
downloadgcc-21d94bf16f124e22fc7ad3c2e4083374eba03c67.zip
gcc-21d94bf16f124e22fc7ad3c2e4083374eba03c67.tar.gz
gcc-21d94bf16f124e22fc7ad3c2e4083374eba03c67.tar.bz2
aarch64: Fix aarch64_split_imm24 patterns
Both patterns used !reload_completed as a condition, which is questionable at best. The branch pattern failed to include a clobber of CC_REGNUM. Both problems were unlikely to trigger in practice, due to how the optimization pipeline is organized, but let's fix them anyway. gcc: * config/aarch64/aarch64.cc (aarch64_gen_compare_split_imm24): New. * config/aarch64/aarch64-protos.h: Update. * config/aarch64/aarch64.md (*aarch64_bcond_wide_imm<GPI>): Use it. Add match_scratch and cc clobbers. Use match_operator instead of iterator expansion. (*compare_cstore<GPI>_insn): Likewise.
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64.cc37
-rw-r--r--gcc/config/aarch64/aarch64.md74
3 files changed, 63 insertions, 49 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 7b9b16b..d26e1d5 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1098,6 +1098,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
aarch64_addr_query_type = ADDR_QUERY_M);
machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
+rtx aarch64_gen_compare_split_imm24 (rtx, rtx, rtx);
bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool);
rtx aarch64_load_tp (rtx);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index cd66f55..d527229 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2882,6 +2882,43 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
return aarch64_gen_compare_reg (code, x, y);
}
+/* Split IMM into two 12-bit halves, producing an EQ/NE comparison vs X.
+ TMP may be a scratch. This optimizes a sequence from
+ mov x0, #imm1
+ movk x0, #imm2, lsl 16 -- x0 contains CST
+ cmp x1, x0
+ into the shorter:
+ sub tmp, x1, #(CST & 0xfff000)
+ subs tmp, tmp, #(CST & 0x000fff)
+*/
+rtx
+aarch64_gen_compare_split_imm24 (rtx x, rtx imm, rtx tmp)
+{
+ HOST_WIDE_INT lo_imm = UINTVAL (imm) & 0xfff;
+ HOST_WIDE_INT hi_imm = UINTVAL (imm) & 0xfff000;
+ enum machine_mode mode = GET_MODE (x);
+
+ if (GET_CODE (tmp) == SCRATCH)
+ tmp = gen_reg_rtx (mode);
+
+ emit_insn (gen_add3_insn (tmp, x, GEN_INT (-hi_imm)));
+ /* TODO: We don't need the gpr result of the second insn. */
+ switch (mode)
+ {
+ case SImode:
+ tmp = gen_addsi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+ break;
+ case DImode:
+ tmp = gen_adddi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+ break;
+ default:
+ abort ();
+ }
+ emit_insn (tmp);
+
+ return gen_rtx_REG (CC_NZmode, CC_REGNUM);
+}
+
/* Generate conditional branch to LABEL, comparing X to 0 using CODE.
Return the jump instruction. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c97dbe6..6f6e3a9 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -978,35 +978,24 @@
(const_string "yes")))]
)
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; mov x0, #imm1
-;; movk x0, #imm2, lsl 16 /* x0 contains CST. */
-;; cmp x1, x0
-;; b<ne,eq> .Label
-;; into the shorter:
-;; sub x0, x1, #(CST & 0xfff000)
-;; subs x0, x0, #(CST & 0x000fff)
-;; b<ne,eq> .Label
+;; For a 24-bit immediate CST we can optimize the compare for equality.
(define_insn_and_split "*aarch64_bcond_wide_imm<GPI:mode>"
- [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
- (match_operand:GPI 1 "aarch64_split_imm24" "n"))
- (label_ref:P (match_operand 2))
- (pc)))]
- "!reload_completed"
+ [(set (pc) (if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:GPI 1 "register_operand" "r")
+ (match_operand:GPI 2 "aarch64_split_imm24" "n")])
+ (label_ref (match_operand 3))
+ (pc)))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (match_scratch:GPI 4 "=r"))]
+ ""
"#"
- "&& true"
+ ""
[(const_int 0)]
{
- HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
- HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
- rtx tmp = gen_reg_rtx (<GPI:MODE>mode);
- emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
- emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
- rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
- rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode,
- cc_reg, const0_rtx);
- emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[2]));
+ rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[1], operands[2],
+ operands[4]);
+ emit_jump_insn (gen_aarch64_bcond (operands[0], cc_reg, operands[3]));
DONE;
}
)
@@ -4649,37 +4638,24 @@
[(set_attr "type" "csel")]
)
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; mov x0, #imm1
-;; movk x0, #imm2, lsl 16 /* x0 contains CST. */
-;; cmp x1, x0
-;; cset x2, <ne,eq>
-;; into the shorter:
-;; sub x0, x1, #(CST & 0xfff000)
-;; subs x0, x0, #(CST & 0x000fff)
-;; cset x2, <ne, eq>.
+;; For a 24-bit immediate CST we can optimize the compare for equality.
(define_insn_and_split "*compare_cstore<mode>_insn"
[(set (match_operand:GPI 0 "register_operand" "=r")
- (EQL:GPI (match_operand:GPI 1 "register_operand" "r")
- (match_operand:GPI 2 "aarch64_split_imm24" "n")))
- (clobber (reg:CC CC_REGNUM))]
- "!reload_completed"
+ (match_operator:GPI 1 "aarch64_equality_operator"
+ [(match_operand:GPI 2 "register_operand" "r")
+ (match_operand:GPI 3 "aarch64_split_imm24" "n")]))
+ (clobber (reg:CC CC_REGNUM))
+ (clobber (match_scratch:GPI 4 "=r"))]
+ ""
"#"
- "&& true"
+ ""
[(const_int 0)]
{
- HOST_WIDE_INT lo_imm = UINTVAL (operands[2]) & 0xfff;
- HOST_WIDE_INT hi_imm = UINTVAL (operands[2]) & 0xfff000;
- rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (-hi_imm)));
- emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
- rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
- rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx);
- emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp_rtx, cc_reg));
+ rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[2], operands[3],
+ operands[4]);
+ emit_insn (gen_aarch64_cstore<mode> (operands[0], operands[1], cc_reg));
DONE;
}
- [(set_attr "type" "csel")]
)
;; zero_extend version of the above