aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg-Johann Lay <avr@gjlay.de>2024-09-15 16:58:09 +0200
committerGeorg-Johann Lay <avr@gjlay.de>2024-09-16 14:05:23 +0200
commit7fb1117310def905f8d151196b8655247dff68ee (patch)
treecc38602938e39f4fd9f255dde2427bc02d4e6339
parent952df9c50b30cc6f849c422b84592a81524f8ef7 (diff)
downloadgcc-7fb1117310def905f8d151196b8655247dff68ee.zip
gcc-7fb1117310def905f8d151196b8655247dff68ee.tar.gz
gcc-7fb1117310def905f8d151196b8655247dff68ee.tar.bz2
AVR: Tweak >= and < compares with consts that are 0 mod 256.
The >= and < comparisons may skip comparing the lower bytes when the according bytes of the constant are all zeros. For example, uint16 >= 0x1200 is true iff hi8 (uint16) >= hi8 (0x1200) and similar for uint16 < 0x1200. Some comparisons against constants that are an integral power of 256 where already handled in the split preparation. That code has been outsourced to new avr_maybe_cmp_lsr() which may change the operands such that the resulting insns become a comparison of the high bytes against 0 plus a EQ / NE branch. For example, uint32 >= 0x10000 can be rewritten as (uint32 >> 16) != 0. The according asm output is performed by new avr_out_cmp_lsr(). gcc/ * config/avr/avr-protos.h (avr_out_cmp_lsr, avr_maybe_cmp_lsr): New. * config/avr/avr.cc (avr_maybe_cmp_lsr, avr_out_cmp_lsr): New functions. (avr_out_compare) [GEU, LTU]: Start output at byte CTZ(xval) / 8. (avr_adjust_insn_length) [ADJUST_LEN_CMP_LSR]: Handle case. * config/avr/avr.md (adjust_len) <cmp_lsr>: New attr value. (*cmp<mode>_lsr): New define_insn_and_split. (cbranch<mode>4_insn): When splitting, run avr_maybe_cmp_lsr() which may map the operands to *cmp<mode>_lsr. gcc/testsuite/ * gcc.target/avr/torture/cmp-lsr-i32.c: New test. * gcc.target/avr/torture/cmp-lsr-u16.c: New test. * gcc.target/avr/torture/cmp-lsr-u24.c: New test. * gcc.target/avr/torture/cmp-lsr-u32.c: New test. * gcc.target/avr/torture/cmp-lsr-u64.c: New test.
-rw-r--r--gcc/config/avr/avr-protos.h3
-rw-r--r--gcc/config/avr/avr.cc147
-rw-r--r--gcc/config/avr/avr.md70
-rw-r--r--gcc/testsuite/gcc.target/avr/torture/cmp-lsr-i32.c82
-rw-r--r--gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u16.c73
-rw-r--r--gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u24.c76
-rw-r--r--gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u32.c78
-rw-r--r--gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u64.c84
8 files changed, 572 insertions, 41 deletions
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index c8aa7c7..96708eb 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -115,7 +115,8 @@ extern const char* output_reload_inhi (rtx*, rtx, int*);
extern const char* output_reload_insisf (rtx*, rtx, int*);
extern const char* avr_out_reload_inpsi (rtx*, rtx, int*);
extern const char* avr_out_lpm (rtx_insn *, rtx*, int*);
-extern void avr_notice_update_cc (rtx body, rtx_insn *insn);
+extern const char* avr_out_cmp_lsr (rtx_insn *, rtx*, int*);
+extern void avr_maybe_cmp_lsr (rtx *);
extern int reg_unused_after (rtx_insn *insn, rtx reg);
extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0);
extern int test_hard_reg_class (enum reg_class rclass, rtx x);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index cf17be0..c0bf132 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -5943,6 +5943,118 @@ avr_canonicalize_comparison (int *icode, rtx *op0, rtx *op1, bool op0_fixed)
}
+/* Try to turn a GEU or LTU comparison of register XOP[1] into an
+ NE / EQ comparison of the higher bytes of XOP[1] against 0.
+ XOP[1] has scalar int or scalar fixed-point mode of 2, 3 or 4 bytes.
+ XOP[2] is a compile-time constant, and XOP[0] = XOP[1] <comp> XOP[2]
+ is the comparison operator. XOP[3] is the branch label, and XOP[4]
+ is a QImode scratch operand.
+ When XOP[1] (viewed as a CONST_INT) is an integral power of 256,
+ then a GTU or LTU comparison can be turned into a NE or EQ comparison
+ of the high bytes against zero. For example, the C code
+
+ if (x >= 1)
+ ccc = 0;
+
+ where x is an unsigned _Accum may be compiled as:
+
+ or r24,r25 ; *cmpsi_lsr
+ breq .L1 ; branch
+ sts ccc,__zero_reg__ ; movqi_insn
+ .L1:
+
+ In the case of success, the operands will be such that they comprise
+ a *cmp<mode>_lsr insn, where mode is HI, PSI or SI, and XOP[0] will be
+ a NE or EQ branch condition. Otherwise, XOP[] is unchanged. */
+
+void
+avr_maybe_cmp_lsr (rtx *xop)
+{
+ rtx_code comp = GET_CODE (xop[0]);
+
+ if ((comp == GEU || comp == LTU)
+ && (CONST_INT_P (xop[2]) || CONST_FIXED_P (xop[2])))
+ {
+ rtx xreg = avr_to_int_mode (xop[1]);
+ rtx xval = avr_to_int_mode (xop[2]);
+ machine_mode imode = GET_MODE (xreg);
+ auto uval = UINTVAL (xval) & GET_MODE_MASK (imode);
+ int shift = exact_log2 (uval);
+
+ if (shift == 8 || shift == 16 || shift == 24)
+ {
+ // Operands such that the compare becomes *cmp<mode>_lsr.
+ xop[1] = gen_rtx_LSHIFTRT (imode, xreg, GEN_INT (shift));
+ xop[2] = const0_rtx;
+ xop[4] = gen_rtx_SCRATCH (QImode);
+ // Branch condition.
+ xop[0] = gen_rtx_fmt_ee (comp == GEU ? NE : EQ,
+ VOIDmode, xop[1], xop[2]);
+ }
+ }
+}
+
+
+/* Output an EQ / NE compare of HI, PSI or SI register XOP[0] against 0,
+ where only the bits starting at XOP[1] are relevant. XOP[1] is a
+ const_int that is 8, 16 or 24. Return "".
+ PLEN == 0: Output instructions.
+ PLEN != 0: Set *PLEN to the length of the sequence in words. */
+
+const char *
+avr_out_cmp_lsr (rtx_insn *insn, rtx *xop, int *plen)
+{
+ rtx xreg = xop[0];
+ const int n_bytes = GET_MODE_SIZE (GET_MODE (xreg));
+ const int shift = INTVAL (xop[1]);
+ const rtx_code cond = compare_condition (insn);
+
+ gcc_assert (shift == 8 || shift == 16 || shift == 24);
+ gcc_assert (shift < 8 * n_bytes);
+ gcc_assert (cond == UNKNOWN || cond == NE || cond == EQ);
+
+ const bool used_p = ! reg_unused_after (insn, xreg);
+
+ if (plen)
+ *plen = 0;
+
+ if (shift / 8 == n_bytes - 1)
+ {
+ rtx xmsb = avr_byte (xreg, n_bytes - 1);
+ avr_asm_len ("tst %0", &xmsb, plen, 1);
+ }
+ else if (n_bytes == 4
+ && shift <= 16
+ && AVR_HAVE_ADIW
+ && REGNO (xreg) >= REG_22
+ // The sequence also works when xreg is unused after,
+ // but SBIW is slower than OR.
+ && used_p)
+ {
+ avr_asm_len ("sbiw %C0,0", &xreg, plen, 1);
+ if (shift == 8)
+ avr_asm_len ("cpc %B0,__zero_reg__", &xreg, plen, 1);
+ }
+ else
+ {
+ rtx op[2] = { avr_byte (xreg, shift / 8), tmp_reg_rtx };
+ if (used_p)
+ {
+ avr_asm_len ("mov %1,%0", op, plen, 1);
+ op[0] = tmp_reg_rtx;
+ }
+
+ for (int i = 1 + shift / 8; i < n_bytes; ++i)
+ {
+ op[1] = avr_byte (xreg, i);
+ avr_asm_len ("or %0,%1", op, plen, 1);
+ }
+ }
+
+ return "";
+}
+
+
/* Output compare instruction
compare (XOP[0], XOP[1])
@@ -5983,7 +6095,8 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
if (plen)
*plen = 0;
- const bool eqne_p = compare_eq_p (insn);
+ const rtx_code cond = compare_condition (insn);
+ const bool eqne_p = cond == EQ || cond == NE;
/* Comparisons == +/-1 and != +/-1 can be done similar to camparing
against 0 by ORing the bytes. This is one instruction shorter.
@@ -6029,6 +6142,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
&& REGNO (xreg) >= REG_22
&& (xval == const0_rtx
|| (IN_RANGE (avr_int16 (xval, 2), 0, 63)
+ && eqne_p
&& reg_unused_after (insn, xreg))))
{
xop[2] = avr_word (xval, 2);
@@ -6039,7 +6153,16 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
bool changed[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
- for (int i = 0; i < n_bytes; i++)
+ /* The >= and < comparisons may skip the lower bytes when the according bytes
+ of the constant are all zeros. In that case, the comparison may start
+ at a byte other than the LSB. */
+
+ const int start = ((cond == GEU || cond == LTU || cond == GE || cond == LT)
+ && INTVAL (xval) != 0)
+ ? ctz_hwi (INTVAL (xval)) / 8
+ : 0;
+
+ for (int i = start; i < n_bytes; i++)
{
/* We compare byte-wise. */
xop[0] = avr_byte (xreg, i);
@@ -6050,25 +6173,26 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
/* Word registers >= R24 can use SBIW/ADIW with 0..63. */
- if (i == 0
- && n_bytes >= 2
+ if (i == start
+ && i % 2 == 0
+ && n_bytes - start >= 2
&& avr_adiw_reg_p (xop[0]))
{
- int val16 = avr_int16 (xval, 0);
+ int val16 = avr_int16 (xval, i);
if (IN_RANGE (val16, 0, 63)
&& (val8 == 0
|| reg_unused_after (insn, xreg)))
{
avr_asm_len ("sbiw %0,%1", xop, plen, 1);
- changed[0] = changed[1] = val8 != 0;
+ changed[i] = changed[i + 1] = val8 != 0;
i++;
continue;
}
if (IN_RANGE (val16, -63, -1)
&& eqne_p
- && n_bytes == 2
+ && n_bytes - start == 2
&& reg_unused_after (insn, xreg))
{
return avr_asm_len ("adiw %0,%n1", xop, plen, 1);
@@ -6079,7 +6203,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
if (val8 == 0)
{
- avr_asm_len (i == 0
+ avr_asm_len (i == start
? "cp %0,__zero_reg__"
: "cpc %0,__zero_reg__", xop, plen, 1);
continue;
@@ -6092,7 +6216,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
if (test_hard_reg_class (LD_REGS, xop[0]))
{
- if (i == 0)
+ if (i == start)
{
avr_asm_len ("cpi %0,%1", xop, plen, 1);
continue;
@@ -6117,7 +6241,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
{
bool found = false;
- for (int j = 0; j < i && ! found; ++j)
+ for (int j = start; j < i && ! found; ++j)
if (val8 == avr_uint8 (xval, j)
// Make sure that we didn't clobber x[j] above.
&& ! changed[j])
@@ -6139,7 +6263,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
avr_asm_len ("ldi %2,%1", xop, plen, 1);
clobber_val = (int) val8;
- avr_asm_len (i == 0
+ avr_asm_len (i == start
? "cp %0,%2"
: "cpc %0,%2", xop, plen, 1);
}
@@ -10326,6 +10450,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
case ADJUST_LEN_COMPARE64: avr_out_compare64 (insn, op, &len); break;
case ADJUST_LEN_CMP_UEXT: avr_out_cmp_ext (op, ZERO_EXTEND, &len); break;
case ADJUST_LEN_CMP_SEXT: avr_out_cmp_ext (op, SIGN_EXTEND, &len); break;
+ case ADJUST_LEN_CMP_LSR: avr_out_cmp_lsr (insn, op, &len); break;
case ADJUST_LEN_LSHRQI: lshrqi3_out (insn, op, &len); break;
case ADJUST_LEN_LSHRHI: lshrhi3_out (insn, op, &len); break;
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 9d79028..aae8a69 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -170,7 +170,7 @@
ashlsi, ashrsi, lshrsi,
ashlpsi, ashrpsi, lshrpsi,
insert_bits, insv_notbit, insv,
- add_set_ZN, add_set_N, cmp_uext, cmp_sext,
+ add_set_ZN, add_set_N, cmp_uext, cmp_sext, cmp_lsr,
no"
(const_string "no"))
@@ -6631,6 +6631,34 @@
(set_attr "adjust_len" "tstsi,*,compare,compare")])
+;; "*cmphi_lsr"
+;; "*cmpsi_lsr"
+;; "*cmppsi_lsr"
+(define_insn_and_split "*cmp<mode>_lsr"
+ [(set (reg:CC REG_CC)
+ (compare:CC (lshiftrt:HISI (match_operand:HISI 0 "register_operand" "r")
+ (match_operand:QI 1 "const_8_16_24_operand" "n"))
+ (const_int 0)))
+ (clobber (scratch:QI))]
+ "reload_completed"
+ {
+ return avr_out_cmp_lsr (insn, operands, NULL);
+ }
+ "&& 1"
+ [;; "cmpqi3"
+ (set (reg:CC REG_CC)
+ (compare:CC (match_dup 0)
+ (const_int 0)))]
+ {
+ // When the comparison is just one byte, then cmpqi3.
+ if (INTVAL (operands[1]) / 8 == <SIZE> - 1)
+ operands[0] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, <SIZE> - 1);
+ else
+ FAIL;
+ }
+ [(set_attr "adjust_len" "cmp_lsr")])
+
+
;; A helper for avr_pass_ifelse::avr_rest_of_handle_ifelse().
(define_expand "gen_compare<mode>"
[(parallel [(set (reg:CC REG_CC)
@@ -6724,20 +6752,9 @@
(label_ref (match_dup 3))
(pc)))]
{
- // Unsigned >= 65536 and < 65536 can be performed by testing the
- // high word against 0.
- if ((GET_CODE (operands[0]) == LTU
- || GET_CODE (operands[0]) == GEU)
- && const_operand (operands[2], <MODE>mode)
- && INTVAL (avr_to_int_mode (operands[2])) == 65536)
- {
- // "cmphi3" of the high word against 0.
- operands[0] = copy_rtx (operands[0]);
- PUT_CODE (operands[0], GET_CODE (operands[0]) == GEU ? NE : EQ);
- operands[1] = simplify_gen_subreg (HImode, operands[1], <MODE>mode, 2);
- operands[2] = const0_rtx;
- operands[4] = gen_rtx_SCRATCH (QImode);
- }
+ // Unsigned >= 256^n and < 256^n can be performed by testing the
+ // higher bytes against 0 (*cmpsi_lsr).
+ avr_maybe_cmp_lsr (operands);
})
;; "cbranchpsi4_insn"
@@ -6760,7 +6777,12 @@
(if_then_else (match_op_dup 0
[(reg:CC REG_CC) (const_int 0)])
(label_ref (match_dup 3))
- (pc)))])
+ (pc)))]
+ {
+ // Unsigned >= 256^n and < 256^n can be performed by testing the
+ // higher bytes against 0 (*cmppsi_lsr).
+ avr_maybe_cmp_lsr (operands);
+ })
;; "cbranchhi4_insn"
;; "cbranchhq4_insn" "cbranchuhq4_insn" "cbranchha4_insn" "cbranchuha4_insn"
@@ -6786,21 +6808,11 @@
(pc)))]
{
// Unsigned >= 256 and < 256 can be performed by testing the
- // high byte against 0.
- if ((GET_CODE (operands[0]) == LTU
- || GET_CODE (operands[0]) == GEU)
- && const_operand (operands[2], <MODE>mode)
- && INTVAL (avr_to_int_mode (operands[2])) == 256)
- {
- rtx_code code = GET_CODE (operands[0]) == GEU ? NE : EQ;
- rtx hi8 = simplify_gen_subreg (QImode, operands[1], <MODE>mode, 1);
- rtx cmp = gen_rtx_fmt_ee (code, VOIDmode, cc_reg_rtx, const0_rtx);
- emit (gen_cmpqi3 (hi8, const0_rtx));
- emit (gen_branch (operands[3], cmp));
- DONE;
- }
+ // high byte against 0 (*cmphi_lsr).
+ avr_maybe_cmp_lsr (operands);
})
+
;; Combiner pattern to compare sign- or zero-extended register against
;; a wider register, like comparing uint8_t against uint16_t.
(define_insn_and_split "*cbranch<HISI:mode>.<code><QIPSI:mode>.0"
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-i32.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-i32.c
new file mode 100644
index 0000000..15e1268
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-i32.c
@@ -0,0 +1,82 @@
+/* Test comparisons against constants that are a multiple of 256. */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T i32
+
+#ifdef __OPTIMIZE__
+
+typedef __INT32_TYPE__ i32;
+typedef __UINT8_TYPE__ u8;
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val) \
+NI void fun_geu_##id (T x) \
+{ \
+ if (x >= val) \
+ cc = 0; \
+} \
+ \
+NI T fun_ltu_##id (T x) \
+{ \
+ if (x < val) \
+ cc = 0; \
+ return x; \
+} \
+ \
+NI void test_##id (void) \
+{ \
+ for (i8 v = -2; v <= 2; ++v) \
+ { \
+ const u8 lt0 = !! (v & 0x80); \
+ const T x = val + (T) v; \
+ \
+ cc = 1; \
+ fun_geu_##id (x); \
+ if (cc != lt0) \
+ __builtin_exit (__LINE__); \
+ \
+ cc = 1; \
+ T y = fun_ltu_##id (x); \
+ if (y != x) \
+ __builtin_exit (__LINE__); \
+ if (cc != ! lt0) \
+ __builtin_exit (__LINE__); \
+ } \
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0x10000)
+MK_FUN (05, 0x110000)
+MK_FUN (06, 0x1000000)
+MK_FUN (07, -256)
+MK_FUN (08, -512)
+MK_FUN (09, -32000)
+MK_FUN (10, -0x40000000)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+ test_01 ();
+ test_02 ();
+ test_03 ();
+ test_04 ();
+ test_05 ();
+ test_06 ();
+ test_07 ();
+ test_08 ();
+ test_09 ();
+ test_10 ();
+#endif /* OPTIMIZE */
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u16.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u16.c
new file mode 100644
index 0000000..268164e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u16.c
@@ -0,0 +1,73 @@
+/* Test comparisons against constants that are a multiple of 256. */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T u16
+
+#ifdef __OPTIMIZE__
+
+typedef __UINT64_TYPE__ u64;
+typedef __UINT32_TYPE__ u32;
+typedef __uint24 u24;
+typedef __UINT16_TYPE__ u16;
+typedef __UINT8_TYPE__ u8;
+
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val) \
+NI void fun_geu_##id (T x) \
+{ \
+ if (x >= val) \
+ cc = 0; \
+} \
+ \
+NI T fun_ltu_##id (T x) \
+{ \
+ if (x < val) \
+ cc = 0; \
+ return x; \
+} \
+ \
+NI void test_##id (void) \
+{ \
+ for (i8 v = -2; v <= 2; ++v) \
+ { \
+ const u8 lt0 = !! (v & 0x80); \
+ const T x = val + (T) v; \
+ \
+ cc = 1; \
+ fun_geu_##id (x); \
+ if (cc != lt0) \
+ __builtin_exit (__LINE__); \
+ \
+ cc = 1; \
+ T y = fun_ltu_##id (x); \
+ if (y != x) \
+ __builtin_exit (__LINE__); \
+ if (cc != ! lt0) \
+ __builtin_exit (__LINE__); \
+ } \
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0xff00)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+ test_01 ();
+ test_02 ();
+ test_03 ();
+#endif /* OPTIMIZE */
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u24.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u24.c
new file mode 100644
index 0000000..d68433b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u24.c
@@ -0,0 +1,76 @@
+/* Test comparisons against constants that are a multiple of 256. */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T u24
+
+#ifdef __OPTIMIZE__
+
+typedef __UINT64_TYPE__ u64;
+typedef __UINT32_TYPE__ u32;
+typedef __uint24 u24;
+typedef __UINT16_TYPE__ u16;
+typedef __UINT8_TYPE__ u8;
+
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val) \
+NI void fun_geu_##id (T x) \
+{ \
+ if (x >= val) \
+ cc = 0; \
+} \
+ \
+NI T fun_ltu_##id (T x) \
+{ \
+ if (x < val) \
+ cc = 0; \
+ return x; \
+} \
+ \
+NI void test_##id (void) \
+{ \
+ for (i8 v = -2; v <= 2; ++v) \
+ { \
+ const u8 lt0 = !! (v & 0x80); \
+ const T x = val + (T) v; \
+ \
+ cc = 1; \
+ fun_geu_##id (x); \
+ if (cc != lt0) \
+ __builtin_exit (__LINE__); \
+ \
+ cc = 1; \
+ T y = fun_ltu_##id (x); \
+ if (y != x) \
+ __builtin_exit (__LINE__); \
+ if (cc != ! lt0) \
+ __builtin_exit (__LINE__); \
+ } \
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0x10000)
+MK_FUN (05, 0x110000)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+ test_01 ();
+ test_02 ();
+ test_03 ();
+ test_04 ();
+ test_05 ();
+#endif /* OPTIMIZE */
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u32.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u32.c
new file mode 100644
index 0000000..8868662
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u32.c
@@ -0,0 +1,78 @@
+/* Test comparisons against constants that are a multiple of 256. */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T u32
+
+#ifdef __OPTIMIZE__
+
+typedef __UINT64_TYPE__ u64;
+typedef __UINT32_TYPE__ u32;
+typedef __uint24 u24;
+typedef __UINT16_TYPE__ u16;
+typedef __UINT8_TYPE__ u8;
+
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val) \
+NI void fun_geu_##id (T x) \
+{ \
+ if (x >= val) \
+ cc = 0; \
+} \
+ \
+NI T fun_ltu_##id (T x) \
+{ \
+ if (x < val) \
+ cc = 0; \
+ return x; \
+} \
+ \
+NI void test_##id (void) \
+{ \
+ for (i8 v = -2; v <= 2; ++v) \
+ { \
+ const u8 lt0 = !! (v & 0x80); \
+ const T x = val + (T) v; \
+ \
+ cc = 1; \
+ fun_geu_##id (x); \
+ if (cc != lt0) \
+ __builtin_exit (__LINE__); \
+ \
+ cc = 1; \
+ T y = fun_ltu_##id (x); \
+ if (y != x) \
+ __builtin_exit (__LINE__); \
+ if (cc != ! lt0) \
+ __builtin_exit (__LINE__); \
+ } \
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0x10000)
+MK_FUN (05, 0x110000)
+MK_FUN (06, 0x1000000)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+ test_01 ();
+ test_02 ();
+ test_03 ();
+ test_04 ();
+ test_05 ();
+ test_06 ();
+#endif /* OPTIMIZE */
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u64.c b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u64.c
new file mode 100644
index 0000000..928c442
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/torture/cmp-lsr-u64.c
@@ -0,0 +1,84 @@
+/* Test comparisons against constants that are a multiple of 256. */
+/* { dg-do run } */
+/* { dg-additional-options { -std=c99 } } */
+
+#define T u64
+
+#ifdef __OPTIMIZE__
+
+typedef __UINT64_TYPE__ u64;
+typedef __UINT32_TYPE__ u32;
+typedef __uint24 u24;
+typedef __UINT16_TYPE__ u16;
+typedef __UINT8_TYPE__ u8;
+
+typedef __INT8_TYPE__ i8;
+
+u8 volatile cc;
+
+#define NI __attribute__((noipa))
+#define AI static __inline__ __attribute__((always_inline))
+
+#define MK_FUN(id, val) \
+NI void fun_geu_##id (T x) \
+{ \
+ if (x >= val) \
+ cc = 0; \
+} \
+ \
+NI T fun_ltu_##id (T x) \
+{ \
+ if (x < val) \
+ cc = 0; \
+ return x; \
+} \
+ \
+NI void test_##id (void) \
+{ \
+ for (i8 v = -2; v <= 2; ++v) \
+ { \
+ const u8 lt0 = !! (v & 0x80); \
+ const T x = val + (T) v; \
+ \
+ cc = 1; \
+ fun_geu_##id (x); \
+ if (cc != lt0) \
+ __builtin_exit (__LINE__); \
+ \
+ cc = 1; \
+ T y = fun_ltu_##id (x); \
+ if (y != x) \
+ __builtin_exit (__LINE__); \
+ if (cc != ! lt0) \
+ __builtin_exit (__LINE__); \
+ } \
+}
+
+MK_FUN (01, 0x100)
+MK_FUN (02, 0x1200)
+MK_FUN (03, 0x8000)
+MK_FUN (04, 0x10000)
+MK_FUN (05, 0x110000)
+MK_FUN (06, 0x1000000)
+MK_FUN (07, 0x8080000000000000)
+MK_FUN (08, 0x0100000000000000)
+MK_FUN (09, 0x0001000000000000)
+
+#endif /* OPTIMIZE */
+
+int main (void)
+{
+#ifdef __OPTIMIZE__
+ test_01 ();
+ test_02 ();
+ test_03 ();
+ test_04 ();
+ test_05 ();
+ test_06 ();
+ test_07 ();
+ test_08 ();
+ test_09 ();
+#endif /* OPTIMIZE */
+
+ return 0;
+}