i386: Fix TARGET_USE_VECTOR_FP_CONVERTS SF->DF float_extend splitter [PR113133]

The post-reload splitter currently allows xmm16+ registers with TARGET_EVEX512. The splitter changes SFmode of the output operand to V4SFmode, but the vector mode is currently unsupported in xmm16+ without TARGET_AVX512VL. lowpart_subreg returns NULL_RTX in this case and the compilation fails with invalid RTX. The patch removes support for x/ymm16+ registers with TARGET_EVEX512. The support should be restored once ix86_hard_regno_mode_ok is fixed to allow 16-byte modes in x/ymm16+ with TARGET_EVEX512. PR target/113133 gcc/ChangeLog: * config/i386/i386.md (TARGET_USE_VECTOR_FP_CONVERTS SF->DF float_extend splitter): Do not handle xmm16+ with TARGET_EVEX512. gcc/testsuite/ChangeLog: * gcc.target/i386/pr113133-1.c: New test. * gcc.target/i386/pr113133-2.c: New test.
author: Uros Bizjak <ubizjak@gmail.com> 2023-12-29 09:47:43 +0100
committer: Uros Bizjak <ubizjak@gmail.com> 2023-12-29 09:53:01 +0100
commit: 1e7f9abb892443719c82bb17910caa8fb5eeec15 (patch)
tree: c86b2baf022cef8783c23e9d1ce291dd6ad6e5cd
parent: 200531d5b9fb99eca2b0d6b8d1e42d176413225f (diff)
download: gcc-1e7f9abb892443719c82bb17910caa8fb5eeec15.zip
gcc-1e7f9abb892443719c82bb17910caa8fb5eeec15.tar.gz
gcc-1e7f9abb892443719c82bb17910caa8fb5eeec15.tar.bz2
3 files changed, 95 insertions, 3 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ca6dbf4..e693d93 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -5210,7 +5210,7 @@
    && optimize_insn_for_speed_p ()
    && reload_completed
    && (!EXT_REX_SSE_REG_P (operands[0])
-       || TARGET_AVX512VL || TARGET_EVEX512)"
+       || TARGET_AVX512VL)"
    [(set (match_dup 2)
 	 (float_extend:V2DF
 	   (vec_select:V2SF
@@ -5226,8 +5226,7 @@
       /* If it is unsafe to overwrite upper half of source, we need
 	 to move to destination and unpack there.  */
       if (REGNO (operands[0]) != REGNO (operands[1])
-	  || (EXT_REX_SSE_REG_P (operands[1])
-	      && !TARGET_AVX512VL))
+	  || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
 	{
 	  rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
 	  emit_move_insn (tmp, operands[1]);
diff --git a/gcc/testsuite/gcc.target/i386/pr113133-1.c b/gcc/testsuite/gcc.target/i386/pr113133-1.c
new file mode 100644
index 0000000..63a1a41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113133-1.c
@@ -0,0 +1,21 @@
+/* PR target/113133 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512f -mtune=barcelona" } */
+
+void
+foo1 (double *d, float f)
+{
+  register float x __asm ("xmm16") = f;
+  asm volatile ("" : "+v" (x));
+
+  *d = x;
+}
+
+void
+foo2 (float *f, double d)
+{
+  register double x __asm ("xmm16") = d;
+  asm volatile ("" : "+v" (x));
+
+  *f = x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113133-2.c b/gcc/testsuite/gcc.target/i386/pr113133-2.c
new file mode 100644
index 0000000..8974d8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113133-2.c
@@ -0,0 +1,72 @@
+/* PR target/113133 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O -fno-tree-ter -mavx512f -mtune=barcelona" } */
+
+typedef char v8u8;
+typedef unsigned char __attribute__((__vector_size__(2))) v16u8;
+typedef signed char __attribute__((__vector_size__(2))) v16s8;
+typedef char __attribute__((__vector_size__(4))) v32u8;
+typedef unsigned char __attribute__((__vector_size__(8))) v64u8;
+typedef char __attribute__((__vector_size__(16))) v128u8;
+typedef signed char __attribute__((__vector_size__(16))) v128s8;
+typedef short __attribute__((__vector_size__(8))) v64u16;
+typedef int __attribute__((__vector_size__(16))) v128u32;
+typedef _Float16 __attribute__((__vector_size__(8))) v64f16;
+typedef _Float32 f32;
+char foo0_u8_0, foo0_ret;
+v16s8 foo0_v16s8_0;
+v64u8 foo0_v64u8_0;
+v128u8 foo0_v128u8_0;
+v128s8 foo0_v128s8_0;
+__attribute__((__vector_size__(2 * sizeof(int)))) int foo0_v64s32_0;
+v128u32 foo0_v128u32_0, foo0_v128f32_0;
+f32 foo0_f32_0, foo0_f128_0;
+v16u8 foo0_v16u8_0;
+v64u16 foo0_v64u16_1;
+void foo0(__attribute__((__vector_size__(4 * sizeof(int)))) int v128s32_0,
+          __attribute__((__vector_size__(sizeof(long)))) long v64s64_0,
+          __attribute__((__vector_size__(2 * sizeof(long)))) long v128u64_0,
+          __attribute__((__vector_size__(2 * sizeof(long)))) long v128s64_0,
+          _Float16 f16_0) {
+  v64f16 v64f16_1 = __builtin_convertvector(foo0_v128f32_0, v64f16);
+  v128u32 v128u32_1 = 0 != foo0_v128u32_0;
+  v16s8 v16s8_1 = __builtin_shufflevector(
+      __builtin_convertvector(foo0_v128s8_0, v128s8), foo0_v16s8_0, 2, 3);
+  v128u8 v128u8_1 = foo0_v128u8_0;
+  v64f16 v64f16_2 = __builtin_convertvector(v128s32_0, v64f16);
+  __attribute__((__vector_size__(2 * sizeof(int)))) int v64u32_1 =
+      -foo0_v64s32_0;
+  __attribute__((__vector_size__(4))) signed char v32s8_1 =
+      __builtin_shufflevector((v16s8){}, v16s8_1, 2, 2, 3, 0);
+  v64u16 v64u16_2 = foo0_v64u16_1 ^ foo0_u8_0;
+  v64u8 v64u8_1 = __builtin_shufflevector(foo0_v64u8_0, foo0_v16u8_0, 6, 7, 4,
+                                          7, 0, 2, 6, 0);
+  foo0_f32_0 *= __builtin_asinh(foo0_f128_0);
+  v128u8 v128u8_r = foo0_v128u8_0 + v128u8_1 + foo0_v128s8_0 +
+                    (v128u8)foo0_v128u32_0 + (v128u8)v128u32_1 +
+                    (v128u8)v128s32_0 + (v128u8)v128u64_0 + (v128u8)v128s64_0 +
+                    (v128u8)foo0_v128f32_0;
+  v64u8 v64u8_r = ((union {
+                    v128u8 a;
+                    v64u8 b;
+                  })v128u8_r)
+                      .b +
+                  foo0_v64u8_0 + v64u8_1 + (v64u8)v64u16_2 + (v64u8)v64u32_1 +
+                  (v64u8)v64s64_0 + (v64u8)v64f16_1 + (v64u8)v64f16_2;
+  v32u8 v32u8_r = ((union {
+                    v64u8 a;
+                    v32u8 b;
+                  })v64u8_r)
+                      .b +
+                  v32s8_1;
+  foo0_ret = ((union {
+                v16u8 a;
+                v8u8 b;
+              })((union {
+                v32u8 a;
+                v16u8 b;
+              })v32u8_r)
+                  .b)
+                 .b +
+             f16_0;
+}
author	Uros Bizjak <ubizjak@gmail.com>	2023-12-29 09:47:43 +0100
committer	Uros Bizjak <ubizjak@gmail.com>	2023-12-29 09:53:01 +0100
commit	1e7f9abb892443719c82bb17910caa8fb5eeec15 (patch)
tree	c86b2baf022cef8783c23e9d1ce291dd6ad6e5cd
parent	200531d5b9fb99eca2b0d6b8d1e42d176413225f (diff)
download	gcc-1e7f9abb892443719c82bb17910caa8fb5eeec15.zip gcc-1e7f9abb892443719c82bb17910caa8fb5eeec15.tar.gz gcc-1e7f9abb892443719c82bb17910caa8fb5eeec15.tar.bz2