diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/regcprop.c | 29 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr98694.c | 41 |
2 files changed, 70 insertions, 0 deletions
diff --git a/gcc/regcprop.c b/gcc/regcprop.c index dd62cb3..e1342f5 100644 --- a/gcc/regcprop.c +++ b/gcc/regcprop.c @@ -358,6 +358,35 @@ copy_value (rtx dest, rtx src, struct value_data *vd) else if (sn > hard_regno_nregs (sr, vd->e[sr].mode)) return; + /* It is not safe to link DEST into the chain if SRC was defined in some + narrower mode M and if M is also narrower than the mode of the first + register in the chain. For example: + (set (reg:DI r1) (reg:DI r0)) + (set (reg:HI r2) (reg:HI r1)) + (set (reg:SI r3) (reg:SI r2)) //Should be a new chain start at r3 + (set (reg:SI r4) (reg:SI r1)) + (set (reg:SI r5) (reg:SI r4)) + + the upper part of r3 is undefined. If we added it to the chain, + it may be used to replace r5, which has defined upper bits. + See PR98694 for details. + + [A] partial_subreg_p (vd->e[sr].mode, GET_MODE (src)) + [B] partial_subreg_p (vd->e[sr].mode, vd->e[vd->e[sr].oldest_regno].mode) + Condition B is added to to catch optimization opportunities of + + (set (reg:HI R1) (reg:HI R0)) + (set (reg:SI R2) (reg:SI R1)) // [A] + (set (reg:DI R3) (reg:DI R2)) // [A] + (set (reg:SI R4) (reg:SI R[0-3])) + (set (reg:HI R5) (reg:HI R[0-4])) + + in which all registers have only 16 defined bits. */ + else if (partial_subreg_p (vd->e[sr].mode, GET_MODE (src)) + && partial_subreg_p (vd->e[sr].mode, + vd->e[vd->e[sr].oldest_regno].mode)) + return; + /* Link DR at the end of the value chain used by SR. */ vd->e[dr].oldest_regno = vd->e[sr].oldest_regno; diff --git a/gcc/testsuite/gcc.target/i386/pr98694.c b/gcc/testsuite/gcc.target/i386/pr98694.c new file mode 100644 index 0000000..45889d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98694.c @@ -0,0 +1,41 @@ +/* PR rtl-optimization/98694 */ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512bw" } */ +/* { dg-require-effective-target avx512bw } */ + +#include<immintrin.h> +typedef short v4hi __attribute__ ((vector_size (8))); +typedef int v2si __attribute__ ((vector_size (8))); +v4hi b; + +__attribute__ ((noipa)) +v2si +foo (__m512i src1, __m512i src2) +{ + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); + short s = (short) m; + int i = (int)m; + b = __extension__ (v4hi) {s, s, s, s}; + return __extension__ (v2si) {i, i}; +} + +int main () +{ + if (!__builtin_cpu_supports ("avx512bw")) + return 0; + + __m512i src1 = _mm512_setzero_si512 (); + __m512i src2 = _mm512_set_epi8 (0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1); + __mmask64 m = _mm512_cmpeq_epu8_mask (src1, src2); + v2si a = foo (src1, src2); + if (a[0] != (int)m) + __builtin_abort (); + return 0; +} |