aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaochen Jiang <haochen.jiang@intel.com>2023-10-19 16:39:16 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2023-10-23 10:02:37 +0800
commit1df490edd48042b07aa780b088148a9118cbcb46 (patch)
tree7b0c3903e058356f0d377e2a3800370922c0211d
parent85858c71a89dfe0d9e7d96fc9f7bdf1a18df65ba (diff)
downloadgcc-1df490edd48042b07aa780b088148a9118cbcb46.zip
gcc-1df490edd48042b07aa780b088148a9118cbcb46.tar.gz
gcc-1df490edd48042b07aa780b088148a9118cbcb46.tar.bz2
i386: Prevent splitting to xmm16+ when !TARGET_AVX512VL
Currently, there will be a chance in split to use x/ymm16+ w/o AVX512VL, which finally leads to an ICE as pr111753 does. This patch aims to fix that. gcc/ChangeLog: PR target/111753 * config/i386/i386.cc (ix86_standard_x87sse_constant_load_p): Do not split to xmm16+ when !TARGET_AVX512VL. gcc/testsuite/ChangeLog: PR target/111753 * gcc.target/i386/pr111753.c: New test.
-rw-r--r--gcc/config/i386/i386.cc3
-rw-r--r--gcc/testsuite/gcc.target/i386/pr111753.c69
2 files changed, 72 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index f4772e0..faa13b3 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -5481,6 +5481,9 @@ ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
if (src == NULL
|| (SSE_REGNO_P (REGNO (dst))
&& standard_sse_constant_p (src, GET_MODE (dst)) != 1)
+ || (!TARGET_AVX512VL
+ && EXT_REX_SSE_REGNO_P (REGNO (dst))
+ && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
|| (STACK_REGNO_P (REGNO (dst))
&& standard_80387_constant_p (src) < 1))
return false;
diff --git a/gcc/testsuite/gcc.target/i386/pr111753.c b/gcc/testsuite/gcc.target/i386/pr111753.c
new file mode 100644
index 0000000..16ceca6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr111753.c
@@ -0,0 +1,69 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512bw -fno-tree-ter -Wno-div-by-zero" } */
+
+typedef int __attribute__((__vector_size__ (8))) v64u8;
+typedef char __attribute__((__vector_size__ (16))) v128u8;
+typedef int __attribute__((__vector_size__ (16))) v128u32;
+typedef int __attribute__((__vector_size__ (32))) v256u8;
+typedef int __attribute__((__vector_size__ (64))) v512u8;
+typedef short __attribute__((__vector_size__ (4))) v32s16;
+typedef short __attribute__((__vector_size__ (16))) v128s16;
+typedef short __attribute__((__vector_size__ (32))) v256s16;
+typedef _Float16 __attribute__((__vector_size__ (16))) f16;
+typedef _Float32 f32;
+typedef double __attribute__((__vector_size__ (64))) v512f64;
+typedef _Decimal32 d32;
+typedef _Decimal64 __attribute__((__vector_size__ (32))) v256d64;
+typedef _Decimal64 __attribute__((__vector_size__ (64))) v512d64;
+d32 foo0_d32_0, foo0_ret;
+v256d64 foo0_v256d64_0;
+v128s16 foo0_v128s16_0;
+int foo0_v256d128_0;
+
+extern void bar(int);
+
+void
+foo (v64u8, v128u8 v128u8_0, v128u8 v128s8_0,
+ v256u8 v256u8_0, int v256s8_0, v512u8 v512u8_0, int v512s8_0,
+ v256s16 v256s16_0,
+ v512u8 v512s16_0,
+ v128u32 v128u64_0,
+ v128u32 v128s64_0,
+ int, int, __int128 v128u128_0, __int128 v128s128_0, v128u32 v128f64_0)
+{
+ v512d64 v512d64_0;
+ v256u8 v256f32_0, v256d64_1 = foo0_v256d64_0 == foo0_d32_0;
+ f32 f32_0;
+ f16 v128f16_0;
+ f32_0 /= 0;
+ v128u8 v128u8_1 = v128u8_0 != 0;
+ int v256d32_1;
+ v256f32_0 /= 0;
+ v32s16 v32s16_1 = __builtin_shufflevector ((v128s16) { }, v256s16_0, 5, 10);
+ v512f64 v512f64_1 = __builtin_convertvector (v512d64_0, v512f64);
+ v512u8 v512d128_1 = v512s16_0;
+ v128s16 v128s16_2 =
+ __builtin_shufflevector ((v32s16) { }, v32s16_1, 0, 3, 2, 1,
+ 0, 0, 0, 3), v128s16_3 = foo0_v128s16_0 > 0;
+ v128f16_0 /= 0;
+ __int128 v128s128_1 = 0 == v128s128_0;
+ v512u8 v512u8_r = v512u8_0 + v512s8_0 + (v512u8) v512f64_1 + v512s16_0;
+ v256u8 v256u8_r = ((union {
+ v512u8 a;
+ v256u8 b;}) v512u8_r).b +
+ v256u8_0 + v256s8_0 + v256f32_0 + v256d32_1 +
+ (v256u8) v256d64_1 + foo0_v256d128_0;
+ v128u8 v128u8_r = ((union {
+ v256u8 a;
+ v128u8 b;}) v256u8_r).b +
+ v128u8_0 + v128u8_1 + v128s8_0 + (v128u8) v128s16_2 +
+ (v128u8) v128s16_3 + (v128u8) v128u64_0 + (v128u8) v128s64_0 +
+ (v128u8) v128u128_0 + (v128u8) v128s128_1 +
+ (v128u8) v128f16_0 + (v128u8) v128f64_0;
+ bar (f32_0 + (int) foo0_d32_0);
+ foo0_ret = ((union {
+ v64u8 a;
+ int b;}) ((union {
+ v128u8 a;
+ v64u8 b;}) v128u8_r).b).b;
+}