diff options
author | Georg-Johann Lay <avr@gjlay.de> | 2024-09-06 11:23:06 +0200 |
---|---|---|
committer | Georg-Johann Lay <avr@gjlay.de> | 2024-09-18 11:17:23 +0200 |
commit | 5bfb91c14f98f6750281217f737b3d95c4e73584 (patch) | |
tree | 746d0d63127ddcf462208266fe160a0393e6d1ad | |
parent | cdeebc71c48db922b14d34c361e15660c1e31fc1 (diff) | |
download | gcc-5bfb91c14f98f6750281217f737b3d95c4e73584.zip gcc-5bfb91c14f98f6750281217f737b3d95c4e73584.tar.gz gcc-5bfb91c14f98f6750281217f737b3d95c4e73584.tar.bz2 |
reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.
The new macro is required because reload and LRA are using different
representations for a multi-register frame pointer. As ELIMINABLE_REGS
is used to initialize static const objects, it can't depend on -mlra.
PR rtl-optimization/116326
gcc/
* reload1.cc (reg_eliminate_1): Initialize from
RELOAD_ELIMINABLE_REGS if defined.
* config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from ELIMINABLE_REGS.
(ELIMINABLE_REGS): Don't mention sub-regnos of the frame pointer.
* doc/tm.texi.in (Eliminating Frame Pointer and Arg Pointer)
<RELOAD_ELIMINABLE_REGS>: Add documentation.
* doc/tm.texi: Rebuild.
gcc/testsuite/
* gcc.target/avr/torture/lra-pr116324.c: New test.
* gcc.target/avr/torture/lra-pr116325.c: New test.
-rw-r--r-- | gcc/config/avr/avr.h | 9 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 8 | ||||
-rw-r--r-- | gcc/doc/tm.texi.in | 8 | ||||
-rw-r--r-- | gcc/reload1.cc | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c | 86 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c | 117 |
6 files changed, 233 insertions, 1 deletions
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h index 1cf4180..3fa2ee7 100644 --- a/gcc/config/avr/avr.h +++ b/gcc/config/avr/avr.h @@ -308,12 +308,19 @@ enum reg_class { #define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2) -#define ELIMINABLE_REGS { \ +#define RELOAD_ELIMINABLE_REGS { \ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM }, \ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ { FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } } +#define ELIMINABLE_REGS \ + { \ + { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ + { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM }, \ + { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM } \ + } + #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ OFFSET = avr_initial_elimination_offset (FROM, TO) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index cc33084..9e52042 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -4005,6 +4005,14 @@ Note that the elimination of the argument pointer with the stack pointer is specified first since that is the preferred elimination. @end defmac +@defmac RELOAD_ELIMINABLE_REGS +Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where +it takes precedence over @code{ELIMINABLE_REGS}. This macro can be useful +during the transition to LRA because there are cases where reload and LRA +disagree on how eliminable registers should be represented. For an example, +see @file{avr.h}. +@end defmac + @deftypefn {Target Hook} bool TARGET_CAN_ELIMINATE (const int @var{from_reg}, const int @var{to_reg}) This target hook should return @code{true} if the compiler is allowed to try to replace register number @var{from_reg} with register number diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 8af3f41..a34674e 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -3179,6 +3179,14 @@ Note that the elimination of the argument pointer with the stack pointer is specified first since that is the preferred elimination. @end defmac +@defmac RELOAD_ELIMINABLE_REGS +Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where +it takes precedence over @code{ELIMINABLE_REGS}. This macro can be useful +during the transition to LRA because there are cases where reload and LRA +disagree on how eliminable registers should be represented. For an example, +see @file{avr.h}. +@end defmac + @hook TARGET_CAN_ELIMINATE @defmac INITIAL_ELIMINATION_OFFSET (@var{from-reg}, @var{to-reg}, @var{offset-var}) diff --git a/gcc/reload1.cc b/gcc/reload1.cc index 2e059b0..120328e 100644 --- a/gcc/reload1.cc +++ b/gcc/reload1.cc @@ -283,7 +283,13 @@ static const struct elim_table_1 const int to; } reg_eliminate_1[] = + /* Reload and LRA don't agree on how a multi-register frame pointer + is represented for elimination. See avr.h for a use case. */ +#ifdef RELOAD_ELIMINABLE_REGS + RELOAD_ELIMINABLE_REGS; +#else ELIMINABLE_REGS; +#endif #define NUM_ELIMINABLE_REGS ARRAY_SIZE (reg_eliminate_1) diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c b/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c new file mode 100644 index 0000000..b54eb40 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c @@ -0,0 +1,86 @@ +/* { dg-options { -std=gnu99 } } */ + +void f7_clr (void *cc) +{ + __asm ("%~call __f7_clr_asm" :: "z" (cc) : "memory"); +} + +void* f7_copy (void *cc, const void *aa) +{ + extern void __f7_copy_asm (void); + __asm ("%~call __f7_copy_asm" :: "z" (cc), "x" (aa) : "memory"); + return cc; +} + +typedef _Bool bool; +typedef unsigned int uint16_t; +typedef unsigned char uint8_t; +typedef int int16_t; + +typedef struct f7_t +{ + union + { + struct + { + uint8_t sign :1; + uint8_t reserved1 :1; + uint8_t is_nan :1; + uint8_t reserved2 :4; + uint8_t is_inf :1; + }; + uint8_t flags; + }; + + uint8_t mant[7]; + int16_t expo; +} f7_t; + + +static inline __attribute__((__always_inline__)) +void __f7_clr (f7_t *cc) +{ + extern void __f7_clr_asm (void); + __asm ("%~call %x[f]" + : + : [f] "i" (__f7_clr_asm), "z" (cc) + : "memory"); +} + +static inline __attribute__((__always_inline__)) +bool __f7_signbit (const f7_t *aa) +{ + return aa->flags & (1 << 0); +} + +static inline __attribute__((__always_inline__)) +int16_t sub_ssat16 (int16_t a, int16_t b) +{ + _Sat _Fract sa = __builtin_avr_rbits (a); + _Sat _Fract sb = __builtin_avr_rbits (b); + return __builtin_avr_bitsr (sa - sb); +} + +extern void __f7_Iadd (f7_t*, const f7_t*); +extern void __f7_addsub (f7_t*, const f7_t*, const f7_t*, bool neg_b); +extern uint8_t __f7_mulx (f7_t*, const f7_t*, const f7_t*, bool); +extern f7_t* __f7_normalize_asm (f7_t*); + +void __f7_madd_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd, + bool neg_d) +{ + f7_t xx7, *xx = &xx7; + uint8_t x_lsb = __f7_mulx (xx, aa, bb, 1 ); + uint8_t x_sign = __f7_signbit (xx); + int16_t x_expo = xx->expo; + __f7_addsub (xx, xx, dd, neg_d); + + + __f7_clr (cc); + cc->expo = sub_ssat16 (x_expo, (8 * 7)); + cc->mant[7 - 1] = x_lsb; + cc = __f7_normalize_asm (cc); + cc->flags = x_sign; + __f7_Iadd (cc, xx); +} + diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c b/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c new file mode 100644 index 0000000..747e9a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/lra-pr116325.c @@ -0,0 +1,117 @@ +typedef __SIZE_TYPE__ size_t; +typedef __UINT8_TYPE__ uint8_t; + +void swapfunc (char *a, char *b, int n) +{ + do + { + char t = *a; + *a++ = *b; + *b++ = t; + } while (--n > 0); +} + + +typedef int cmp_t (const void*, const void*); + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +#define swap(a, b) \ + swapfunc (a, b, es) + +#define vecswap(a, b, n) \ + if ((n) > 0) swapfunc (a, b, n) + +static char* +med3 (char *a, char *b, char *c, cmp_t *cmp) +{ + return cmp (a, b) < 0 + ? (cmp (b, c) < 0 ? b : (cmp (a, c) < 0 ? c : a )) + : (cmp (b, c) > 0 ? b : (cmp (a, c) < 0 ? a : c )); +} + +void +qsort (void *a, size_t n, size_t es, cmp_t *cmp) +{ + char *pa, *pb, *pc, *pd, *pl, *pm, *pn; + int d, r, swap_cnt; + +loop: + swap_cnt = 0; + if (n < 7) + { + for (pm = (char*) a + es; pm < (char*) a + n * es; pm += es) + for (pl = pm; pl > (char*) a && cmp (pl - es, pl) > 0; pl -= es) + swap (pl, pl - es); + return; + } + pm = (char*) a + (n / 2) * es; + if (n > 7) + { + pl = a; + pn = (char*) a + (n - 1) * es; + if (n > 40) + { + d = (n / 8) * es; + pl = med3 (pl, pl + d, pl + 2 * d, cmp); + pm = med3 (pm - d, pm, pm + d, cmp); + pn = med3 (pn - 2 * d, pn - d, pn, cmp); + } + pm = med3 (pl, pm, pn, cmp); + } + swap (a, pm); + pa = pb = (char*) a + es; + + pc = pd = (char*) a + (n - 1) * es; + for (;;) + { + while (pb <= pc && (r = cmp (pb, a)) <= 0) + { + if (r == 0) + { + swap_cnt = 1; + swap (pa, pb); + pa += es; + } + pb += es; + } + while (pb <= pc && (r = cmp (pc, a)) >= 0) + { + if (r == 0) + { + swap_cnt = 1; + swap (pc, pd); + pd -= es; + } + pc -= es; + } + if (pb > pc) + break; + swap (pb, pc); + swap_cnt = 1; + pb += es; + pc -= es; + } + if (swap_cnt == 0) + { + for (pm = (char*) a + es; pm < (char*) a + n * es; pm += es) + for (pl = pm; pl > (char*) a && cmp (pl - es, pl) > 0; pl -= es) + swap (pl, pl - es); + return; + } + + pn = (char*) a + n * es; + r = min (pa - (char*) a, pb - pa); + vecswap (a, pb - r, r); + r = min (pd - pc, (int) (pn - pd - es)); + vecswap (pb, pn - r, r); + if ((r = pb - pa) > (int) es) + qsort(a, r / es, es, cmp); + if ((r = pd - pc) > (int) es) + { + /* Iterate rather than recurse to save stack space */ + a = pn - r; + n = r / es; + goto loop; + } +} |