aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorGeorg-Johann Lay <avr@gjlay.de>2025-03-15 20:53:52 +0100
committerGeorg-Johann Lay <avr@gjlay.de>2025-03-22 17:49:57 +0100
commit94355acc2debe03eb3b0a85229e340675a1ff6bd (patch)
tree4d2f00b9f3b407b26093570ad3b2c7216df36c93 /gcc
parent8736edca77a59157a3dae5b3aa5ca59f4fe4b4a4 (diff)
downloadgcc-94355acc2debe03eb3b0a85229e340675a1ff6bd.zip
gcc-94355acc2debe03eb3b0a85229e340675a1ff6bd.tar.gz
gcc-94355acc2debe03eb3b0a85229e340675a1ff6bd.tar.bz2
AVR: target/119421 Better optimize some bit operations.
There are occasions where knowledge about nonzero bits makes some optimizations possible. For example, Rd |= Rn << Off can be implemented as SBRC Rn, 0 ORI Rd, 1 << Off when Rn in { 0, 1 }, i.e. nonzero_bits (Rn) == 1. This patch adds some patterns that exploit nonzero_bits() in some combiner patterns. As insn conditions are not supposed to contain nonzero_bits(), the patch splits such insns right after pass insn combine. PR target/119421 gcc/ * config/avr/avr.opt (-muse-nonzero-bits): New option. * config/avr/avr-protos.h (avr_nonzero_bits_lsr_operands_p): New. (make_avr_pass_split_nzb): New. * config/avr/avr.cc (avr_nonzero_bits_lsr_operands_p): New function. (avr_rtx_costs_1): Return costs for the new insns. * config/avr/avr.md (nzb): New insn attribute. (*nzb=1.<code>...): New insns to better support some bit operations for <code> in AND, IOR, XOR. * config/avr/avr-passes.def (avr_pass_split_nzb): Insert pass atfer combine. * config/avr/avr-passes.cc (avr_pass_data_split_nzb). New pass data. (avr_pass_split_nzb): New pass. (make_avr_pass_split_nzb): New function. * common/config/avr/avr-common.cc (avr_option_optimization_table): Enable -muse-nonzero-bits for -O2 and higher. * doc/invoke.texi (AVR Options): Document -muse-nonzero-bits. gcc/testsuite/ * gcc.target/avr/torture/pr119421-sreg.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/common/config/avr/avr-common.cc1
-rw-r--r--gcc/config/avr/avr-passes.cc71
-rw-r--r--gcc/config/avr/avr-passes.def12
-rw-r--r--gcc/config/avr/avr-protos.h2
-rw-r--r--gcc/config/avr/avr.cc106
-rw-r--r--gcc/config/avr/avr.md215
-rw-r--r--gcc/config/avr/avr.opt4
-rw-r--r--gcc/doc/invoke.texi9
-rw-r--r--gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c301
9 files changed, 719 insertions, 2 deletions
diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc
index 06c6cc8..203a965 100644
--- a/gcc/common/config/avr/avr-common.cc
+++ b/gcc/common/config/avr/avr-common.cc
@@ -42,6 +42,7 @@ static const struct default_options avr_option_optimization_table[] =
{ OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 },
{ OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_msplit_ldst, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_muse_nonzero_bits, NULL, 1 },
// Stick to the "old" placement of the subreg lowering pass.
{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
/* Allow optimizer to introduce store data races. This used to be the
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 184619a..2c21e7b 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -29,6 +29,7 @@
#include "target.h"
#include "rtl.h"
#include "tree.h"
+#include "diagnostic-core.h"
#include "cfghooks.h"
#include "cfganal.h"
#include "df.h"
@@ -4848,6 +4849,70 @@ avr_pass_fuse_add::execute1 (function *func)
//////////////////////////////////////////////////////////////////////////////
+// Split insns with nonzero_bits() after combine.
+
+static const pass_data avr_pass_data_split_nzb =
+{
+ RTL_PASS, // type
+ "", // name (will be patched)
+ OPTGROUP_NONE, // optinfo_flags
+ TV_DF_SCAN, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ 0 // todo_flags_finish
+};
+
+class avr_pass_split_nzb : public rtl_opt_pass
+{
+public:
+ avr_pass_split_nzb (gcc::context *ctxt, const char *name)
+ : rtl_opt_pass (avr_pass_data_split_nzb, ctxt)
+ {
+ this->name = name;
+ }
+
+ unsigned int execute (function *) final override
+ {
+ if (avropt_use_nonzero_bits)
+ split_nzb_insns ();
+ return 0;
+ }
+
+ void split_nzb_insns ();
+
+}; // avr_pass_split_nzb
+
+
+void
+avr_pass_split_nzb::split_nzb_insns ()
+{
+ rtx_insn *next;
+
+ for (rtx_insn *insn = get_insns (); insn; insn = next)
+ {
+ next = NEXT_INSN (insn);
+
+ if (INSN_P (insn)
+ && single_set (insn)
+ && get_attr_nzb (insn) == NZB_YES)
+ {
+ rtx_insn *last = try_split (PATTERN (insn), insn, 1 /*last*/);
+
+ // The nonzero_bits() insns *must* split. If not: ICE.
+ if (last == insn)
+ {
+ debug_rtx (insn);
+ internal_error ("failed to split insn");
+ }
+ }
+ }
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
// Split shift insns after peephole2 / befor avr-fuse-move.
static const pass_data avr_pass_data_split_after_peephole2 =
@@ -5645,6 +5710,12 @@ make_avr_pass_casesi (gcc::context *ctxt)
return new avr_pass_casesi (ctxt, "avr-casesi");
}
+rtl_opt_pass *
+make_avr_pass_split_nzb (gcc::context *ctxt)
+{
+ return new avr_pass_split_nzb (ctxt, "avr-split-nzb");
+}
+
// Try to replace 2 cbranch insns with 1 comparison and 2 branches.
rtl_opt_pass *
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index 091005e..eb60a93 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -74,6 +74,18 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes);
INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
+/* Some combine insns have nonzero_bits() in their condition, though insns
+ should not use such stuff in their condition. Therefore, we split such
+ insn into something without nonzero_bits() in their condition right after
+ insn combine.
+
+ Since neither split_all_insns() nor split_all_insns_noflow() work at that
+ point (presumably since there are splits involving branches), we split
+ respective insns (and only such insns) by hand. Respective insns are
+ tagged with insn attribute nzb = "yes" so that they are easy to spot. */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_split_nzb);
+
/* If-else decision trees generated for switch / case may produce sequences
like
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 83137c7..ca30136 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -136,6 +136,7 @@ extern bool reg_unused_after (rtx_insn *insn, rtx reg);
extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0);
extern bool test_hard_reg_class (enum reg_class rclass, rtx x);
extern bool jump_over_one_insn_p (rtx_insn *insn, rtx dest);
+extern bool avr_nonzero_bits_lsr_operands_p (rtx_code, rtx *);
extern void avr_final_prescan_insn (rtx_insn *insn, rtx *operand,
int num_operands);
@@ -205,6 +206,7 @@ extern rtl_opt_pass *make_avr_pass_pre_proep (gcc::context *);
extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
#ifdef RTX_CODE
extern bool avr_casei_sequence_check_operands (rtx *xop);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 0ce06a1..d94df84 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -12706,6 +12706,50 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
}
}
+ // Insns with nonzero_bits() == 1 in the condition.
+ if (avropt_use_nonzero_bits
+ && mode == QImode
+ && (code == AND || code == IOR || code == XOR)
+ && REG_P (XEXP (x, 1)))
+ {
+ // "*nzb=1.<code>.lsr_split"
+ // "*nzb=1.<code>.lsr.not_split"
+ bool is_nzb = (GET_CODE (XEXP (x, 0)) == LSHIFTRT
+ && (REG_P (XEXP (XEXP (x, 0), 0))
+ || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR)
+ && const_0_to_7_operand (XEXP (XEXP (x, 0), 1), QImode));
+ // "*nzb=1.<code>.zerox_split"
+ // "*nzb=1.<code>.zerox.not_split"
+ is_nzb |= (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+ && (REG_P (XEXP (XEXP (x, 0), 0))
+ || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR)
+ && const1_operand (XEXP (XEXP (x, 0), 1), QImode)
+ && const_0_to_7_operand (XEXP (XEXP (x, 0), 2), QImode));
+ // "*nzb=1.<code>.ge0_split"
+ is_nzb |= (GET_CODE (XEXP (x, 0)) == GE
+ && REG_P (XEXP (XEXP (x, 0), 0))
+ && const0_operand (XEXP (XEXP (x, 0), 1), QImode));
+ if (is_nzb)
+ {
+ *total = COSTS_N_INSNS (code == XOR ? 3 : 2);
+ return true;
+ }
+ }
+
+ // Insn "*nzb=1.ior.ashift_split" with nonzero_bits() == 1 in the condition.
+ if (avropt_use_nonzero_bits
+ && mode == QImode
+ && code == IOR
+ && REG_P (XEXP (x, 1))
+ && GET_CODE (XEXP (x, 0)) == ASHIFT
+ && REG_P (XEXP (XEXP (x, 0), 0))
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+ {
+ *total = COSTS_N_INSNS (2);
+ return true;
+ }
+
+
switch (code)
{
case CONST_INT:
@@ -13684,6 +13728,28 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
*total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
return true;
+ case GE:
+ if (mode == QImode
+ && REG_P (XEXP (x, 0))
+ && XEXP (x, 1) == const0_rtx)
+ {
+ *total = COSTS_N_INSNS (3);
+ return true;
+ }
+ break;
+
+ case ZERO_EXTRACT:
+ if (mode == QImode
+ && REG_P (XEXP (x, 0))
+ && XEXP (x, 1) == const1_rtx
+ && CONST_INT_P (XEXP (x, 2)))
+ {
+ int bpos = INTVAL (XEXP (x, 2));
+ *total = COSTS_N_INSNS (bpos == 0 ? 1 : bpos == 1 ? 2 : 3);
+ return true;
+ }
+ break;
+
case COMPARE:
switch (GET_MODE (XEXP (x, 0)))
{
@@ -15171,6 +15237,46 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op,
}
+/* A helper for the insn condition of "*nzb=1.<code>.lsr[.not]_split"
+ where <code> is AND, IOR or XOR. Return true when
+
+ OP[0] <code>= OP[1] >> OP[2]
+
+ can be performed by means of the code of "*nzb=1.<code>.zerox", i.e.
+
+ OP[0] <code>= OP[1].OP[2]
+
+ For example, when OP[0] is in { 0, 1 }, then R24 &= R10.4
+ can be performed by means of SBRS R10,4 $ CLR R24.
+ Notice that the constraint of OP[3] is "0". */
+
+bool
+avr_nonzero_bits_lsr_operands_p (rtx_code code, rtx *op)
+{
+ if (reload_completed)
+ return false;
+
+ const auto offs = INTVAL (op[2]);
+ const auto op1_non0 = nonzero_bits (op[1], QImode);
+ const auto op3_non0 = nonzero_bits (op[3], QImode);
+
+ switch (code)
+ {
+ default:
+ gcc_unreachable ();
+
+ case IOR:
+ case XOR:
+ return op1_non0 >> offs == 1;
+
+ case AND:
+ return op3_non0 == 1;
+ }
+
+ return false;
+}
+
+
/* Worker function for cpymemhi expander.
XOP[0] Destination as MEM:BLK
XOP[1] Source " "
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 06e31aa..1c4e44d 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -84,6 +84,7 @@
[UNSPEC_STRLEN
UNSPEC_CPYMEM
UNSPEC_INDEX_JMP
+ UNSPEC_NZB
UNSPEC_FMUL
UNSPEC_FMULS
UNSPEC_FMULSU
@@ -175,6 +176,10 @@
no"
(const_string "no"))
+(define_attr "nzb"
+ "yes, no"
+ (const_string "no"))
+
;; Flavours of instruction set architecture (ISA), used in enabled attribute
;; mov : ISA has no MOVW movw : ISA has MOVW
@@ -10916,6 +10921,216 @@
DONE;
})
+;; Patterns for -muse-nonzero-bits use nonzero_bits() in their condition,
+;; which makes possible some more optimizations.
+;; Since combine may add clobber of REG_CC, we must make sure that there are
+;; no other routes to synthesize such patterns. We use an UNSPEC for that.
+;; As insns are not supposed to use stuff like nonzero_bits() in their
+;; condition, we split the insns right after reload. For CFG reasons we have
+;; to do the splits by hand in avr_pass_split_nzb. All insns that must be
+;; split by that pass must have insn attribute "nzb" set to "yes". Moreover,
+;; the insns to split must be single_sets and must not touch control flow.
+
+(define_code_attr nzb_constr_rdr [(and "r") (ior "d") (xor "r")])
+(define_code_attr nzb_use1_nnr [(and "n") (ior "n") (xor "r")])
+
+(define_insn_and_split "*nzb=1.<code>.zerox_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand")
+ (const_int 1)
+ (match_operand:QI 2 "const_0_to_7_operand"))
+ (match_operand:QI 3 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && (<CODE> == IOR || <CODE> == XOR
+ || nonzero_bits (operands[3], QImode) == 1)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (unspec:QI [(match_dup 3)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.<code>.zerox"
+ [(set (match_operand:QI 0 "register_operand" "=<nzb_constr_rdr>")
+ (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand" "r")
+ (const_int 1)
+ (match_operand:QI 2 "const_0_to_7_operand" "n"))
+ (unspec:QI [(match_operand:QI 3 "register_operand" "0")
+ ] UNSPEC_NZB)))
+ (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>"))
+ (clobber (reg:CC REG_CC))]
+ "optimize && avropt_use_nonzero_bits"
+ {
+ if (<CODE> == AND)
+ return "sbrs %1,%2\;clr %0";
+ else if (<CODE> == IOR)
+ return "sbrc %1,%2\;ori %0,1";
+ else if (<CODE> == XOR)
+ return "sbrc %1,%2\;eor %0,%4";
+ else
+ gcc_unreachable ();
+ }
+ [(set_attr "length" "2")])
+
+(define_insn_and_split "*nzb=1.<code>.lsr_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (lshiftrt:QI (match_operand:QI 1 "register_operand")
+ (match_operand:QI 2 "const_0_to_7_operand"))
+ (match_operand:QI 3 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (unspec:QI [(match_dup 3)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.zerox.not_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (zero_extract:QI (xor:QI (match_operand:QI 1 "register_operand")
+ (match_operand:QI 4 "const_int_operand"))
+ (const_int 1)
+ (match_operand:QI 2 "const_0_to_7_operand"))
+ (match_operand:QI 3 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4]))
+ && (<CODE> == IOR
+ || nonzero_bits (operands[3], QImode) == 1)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ ; "*nzb=1.<code>.zerox.not"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+ (const_int 1)
+ (match_dup 2))
+ (unspec:QI [(match_dup 3)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.lsr.not_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (lshiftrt:QI (xor:QI (match_operand:QI 1 "register_operand")
+ (match_operand:QI 4 "const_int_operand"))
+ (match_operand:QI 2 "const_0_to_7_operand"))
+ (match_operand:QI 3 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4]))
+ && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ ; "*nzb=1.<code>.zerox.not"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+ (const_int 1)
+ (match_dup 2))
+ (unspec:QI [(match_dup 3)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.ge0_split"
+ [(set (match_operand:QI 0 "register_operand")
+ (bitop:QI (ge:QI (match_operand:QI 1 "register_operand")
+ (const_int 0))
+ (match_operand:QI 2 "register_operand")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && (<CODE> == IOR || <CODE> == XOR
+ || nonzero_bits (operands[2], QImode) == 1)"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ ; "*nzb=1.<code>.zerox.not"
+ [(parallel [(set (match_dup 0)
+ (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+ (const_int 1)
+ (const_int 7))
+ (unspec:QI [(match_dup 2)
+ ] UNSPEC_NZB)))
+ (use (const_int 1))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.<code>.zerox.not"
+ [(set (match_operand:QI 0 "register_operand" "=<nzb_constr_rdr>")
+ (bitop:QI (zero_extract:QI (not:QI (match_operand:QI 1 "register_operand" "r"))
+ (const_int 1)
+ (match_operand:QI 2 "const_0_to_7_operand" "n"))
+ (unspec:QI [(match_operand:QI 3 "register_operand" "0")
+ ] UNSPEC_NZB)))
+ (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>"))
+ (clobber (reg:CC REG_CC))]
+ "optimize && avropt_use_nonzero_bits"
+ {
+ if (<CODE> == AND)
+ return "sbrc %1,%2\;clr %0";
+ else if (<CODE> == IOR)
+ return "sbrs %1,%2\;ori %0,1";
+ else if (<CODE> == XOR)
+ return "sbrs %1,%2\;eor %0,%4";
+ else
+ gcc_unreachable ();
+ }
+ [(set_attr "length" "2")])
+
+(define_insn_and_split "*nzb=1.ior.ashift_split"
+ [(set (match_operand:QI 0 "register_operand" "=d")
+ (ior:QI (ashift:QI (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "const_0_to_7_operand" "n"))
+ (match_operand:QI 3 "register_operand" "0")))]
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed
+ && nonzero_bits (operands[1], QImode) == 1"
+ { gcc_unreachable (); }
+ "optimize && avropt_use_nonzero_bits
+ && !reload_completed"
+ [(parallel [(set (match_dup 0)
+ (unspec:QI [(ior:QI (ashift:QI (match_dup 1)
+ (match_dup 2))
+ (match_dup 3))
+ ] UNSPEC_NZB))
+ (clobber (reg:CC REG_CC))])]
+ ""
+ [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.ior.ashift"
+ [(set (match_operand:QI 0 "register_operand" "=d")
+ (unspec:QI [(ior:QI (ashift:QI (match_operand:QI 1 "register_operand" "r")
+ (match_operand:QI 2 "const_0_to_7_operand" "n"))
+ (match_operand:QI 3 "register_operand" "0"))
+ ] UNSPEC_NZB))
+ (clobber (reg:CC REG_CC))]
+ "optimize && avropt_use_nonzero_bits"
+ "sbrc %1,0\;ori %0,1<<%2"
+ [(set_attr "length" "2")])
+
;; Work around PR115307: Early passes expand isinf/f/l to a bloat.
;; These passes do not consider costs, and there is no way to
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index d22a118..fcd2bf6 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -65,6 +65,10 @@ mpr118012
Target Var(avropt_pr118012) UInteger Init(1) Undocumented
This option is on per default in order to work around PR118012.
+muse-nonzero-bits
+Target Var(avropt_use_nonzero_bits) UInteger Init(0) Optimization
+Optimization. Allow to use nonzero_bits() in some insn conditions.
+
mshort-calls
Target RejectNegative Mask(SHORT_CALLS)
This option is used internally for multilib generation and selection. Assume RJMP / RCALL can target all program memory.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 515d91a..81bfacf 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -912,8 +912,8 @@ Objective-C and Objective-C++ Dialects}.
-mdouble=@var{bits} -mlong-double=@var{bits} -mno-call-main
-mn_flash=@var{size} -mfract-convert-truncate -mno-interrupts
-mmain-is-OS_task -mrelax -mrmw -mstrict-X -mtiny-stack
--mrodata-in-ram -msplit-bit-shift -msplit-ldst
--mshort-calls -mskip-bug -nodevicelib -nodevicespecs
+-mrodata-in-ram -msplit-bit-shift -msplit-ldst -mshort-calls
+-mskip-bug -muse-nonzero-bits -nodevicelib -nodevicespecs
-Waddr-space-convert -Wmisspelled-isr}
@emph{Blackfin Options} (@ref{Blackfin Options})
@@ -24630,6 +24630,11 @@ a multiple of 8 is controlled by @option{-mfuse-move}.
Split multi-byte loads and stores into several byte loads and stores.
This optimization is turned on per default for @option{-O2} and higher.
+@opindex muse-nonzero-bits
+@item -muse-nonzero-bits
+Enable some patterns for bit optimizations that depend on specific values.
+This optimization is turned on per default for @option{-O2} and higher.
+
@end table
@anchor{eind}
diff --git a/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c b/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c
new file mode 100644
index 0000000..3752d4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/pr119421-sreg.c
@@ -0,0 +1,301 @@
+/* { dg-do run } */
+/* { dg-additional-options "-std=gnu99 -Wno-pedantic" } */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define BITNO_I 7
+#define BITNO_T 6
+#define BITNO_H 5
+#define BITNO_S 4
+#define BITNO_V 3
+#define BITNO_N 2
+#define BITNO_Z 1
+#define BITNO_C 0
+
+#define I (1u << BITNO_I)
+#define T (1u << BITNO_T)
+#define H (1u << BITNO_H)
+#define S (1u << BITNO_S)
+#define V (1u << BITNO_V)
+#define N (1u << BITNO_N)
+#define Z (1u << BITNO_Z)
+#define C (1u << BITNO_C)
+
+#define bit(a, x) ((bool) ((a) & (1u << (x))))
+
+typedef union
+{
+ uint8_t val;
+ struct
+ {
+ bool c:1;
+ bool z:1;
+ bool n:1;
+ bool v:1;
+ bool s:1;
+ bool h:1;
+ bool t:1;
+ bool i:1;
+ };
+} sreg_t;
+
+
+typedef struct
+{
+ sreg_t sreg;
+ uint8_t mask;
+ uint16_t result;
+} flags_t;
+
+flags_t flags_sub (uint8_t d, uint8_t r)
+{
+ uint8_t res = d - r;
+ bool R7 = bit (res, 7);
+
+ bool Rd7 = bit (d, 7);
+ bool Rd3 = bit (d, 3);
+
+ bool R3 = bit (res, 3);
+ bool Rr7 = bit (r, 7);
+ bool Rr3 = bit (r, 3);
+
+ sreg_t s = { 0 };
+
+ s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7);
+ s.n = R7;
+ s.z = res == 0;
+ s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7);
+ s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3);
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_sbc (uint8_t d, uint8_t r, sreg_t sreg)
+{
+ uint8_t res = d - r - sreg.c;
+ bool R7 = bit (res, 7);
+
+ bool Rd7 = bit (d, 7);
+ bool Rd3 = bit (d, 3);
+
+ bool R3 = bit (res, 3);
+ bool Rr7 = bit (r, 7);
+ bool Rr3 = bit (r, 3);
+
+ sreg_t s = { 0 };
+
+ s.v = (Rd7 & !Rr7 & !R7) | (!Rd7 & Rr7 & R7);
+ s.n = R7;
+ s.z = (res == 0) & sreg.z;
+ s.c = (!Rd7 & Rr7) | (Rr7 & R7) | (R7 & !Rd7);
+ s.h = (!Rd3 & Rr3) | (Rr3 & R3) | (R3 & !Rd3);
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_neg (uint8_t d)
+{
+ uint8_t res = -d;
+ bool R7 = bit (res, 7);
+ bool R6 = bit (res, 6);
+ bool R5 = bit (res, 5);
+ bool R4 = bit (res, 4);
+ bool R3 = bit (res, 3);
+ bool R2 = bit (res, 2);
+ bool R1 = bit (res, 1);
+ bool R0 = bit (res, 0);
+
+ bool Rd3 = bit (d, 3);
+
+ sreg_t s = { 0 };
+
+ s.v = R7 & !R6 & !R5 & !R4 & !R3 & !R2 & !R1 & !R0;
+ s.n = R7;
+ s.z = res == 0;
+ s.c = R7 | R6 | R5 | R4 | R3 | R2 | R1 | R0;
+ s.h = R3 | Rd3;
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+flags_t flags_ror (uint8_t d, sreg_t sreg)
+{
+ uint8_t res = (d + 0x100 * sreg.c) >> 1;
+
+ sreg_t s = { 0 };
+
+ s.c = bit (d, 0);
+ s.z = res == 0;
+ s.n = bit (res, 7);
+ s.v = s.n ^ s.c;
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, S | V | N | Z | C, res };
+}
+
+flags_t flags_add (uint8_t d, uint8_t r)
+{
+ uint8_t res = d + r;
+ bool R7 = bit (res, 7);
+
+ bool Rd7 = bit (d, 7);
+ bool Rd3 = bit (d, 3);
+
+ bool R3 = bit (res, 3);
+ bool Rr7 = bit (r, 7);
+ bool Rr3 = bit (r, 3);
+
+ sreg_t s = { 0 };
+
+ s.v = (Rd7 & Rr7 & !R7) | (!Rd7 & !Rr7 & R7);
+ s.n = R7;
+ s.z = res == 0;
+ s.c = (Rd7 & Rr7) | (Rr7 & !R7) | (!R7 & Rd7);
+ s.h = (Rd3 & Rr3) | (Rr3 & !R3) | (!R3 & Rd3);
+ s.s = s.n ^ s.v;
+
+ return (flags_t) { s, H | S | V | N | Z | C, res };
+}
+
+static inline
+sreg_t sreg_sub (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "sub %[d],%[r]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d)
+ : [r] "r" (r));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_sbc (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "sbc %[d],%[r]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d)
+ : [r] "r" (r));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_neg (uint8_t d, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "neg %[d]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_ror (uint8_t d, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "ror %[d]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+static inline
+sreg_t sreg_add (uint8_t d, uint8_t r, uint8_t sreg, uint8_t result)
+{
+ __asm ("out __SREG__,%[sreg]" "\n\t"
+ "add %[d],%[r]" "\n\t"
+ "in %[sreg],__SREG__"
+ : [sreg] "+r" (sreg), [d] "+r" (d)
+ : [r] "r" (r));
+ if (d != result)
+ exit (__LINE__);
+ return (sreg_t) sreg;
+}
+
+void test_sub (uint8_t d, uint8_t r, sreg_t sreg)
+{
+ sreg_t s0 = sreg_sub (d, r, sreg.val, d - r);
+ flags_t f = flags_sub (d, r);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_sbc (uint8_t d, uint8_t r, sreg_t sreg)
+{
+ sreg_t s0 = sreg_sbc (d, r, sreg.val, d - r - sreg.c);
+ flags_t f = flags_sbc (d, r, sreg);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_neg (uint8_t d, sreg_t sreg)
+{
+ sreg_t s0 = sreg_neg (d, sreg.val, -d);
+ flags_t f = flags_neg (d);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_add (uint8_t d, uint8_t r, sreg_t sreg)
+{
+ sreg_t s0 = sreg_add (d, r, sreg.val, d + r);
+ flags_t f = flags_add (d, r);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_ror (uint8_t d, sreg_t sreg)
+{
+ sreg_t s0 = sreg_ror (d, sreg.val, (d + 0x100 * sreg.c) >> 1);
+ flags_t f = flags_ror (d, sreg);
+ if ((f.sreg.val & f.mask) != (s0.val & f.mask))
+ exit (__LINE__);
+}
+
+void test_sreg (void)
+{
+ uint8_t d = 0;
+
+ do
+ {
+ uint8_t r = 0;
+ test_neg (d, (sreg_t) { 0x00 });
+ test_neg (d, (sreg_t) { 0xff });
+
+ test_ror (d, (sreg_t) { 0 });
+ test_ror (d, (sreg_t) { C });
+
+ do
+ {
+ test_add (d, r, (sreg_t) { 0x00 });
+ test_add (d, r, (sreg_t) { 0xff });
+
+ test_sub (d, r, (sreg_t) { 0x00 });
+ test_sub (d, r, (sreg_t) { 0xff });
+
+ test_sbc (d, r, (sreg_t) { 0 });
+ test_sbc (d, r, (sreg_t) { C });
+ test_sbc (d, r, (sreg_t) { Z });
+ test_sbc (d, r, (sreg_t) { C | Z });
+ } while (++r);
+ } while (++d);
+}
+
+int main (void)
+{
+ test_sreg();
+ return 0;
+}