aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2020-04-03 11:49:10 -0700
committerH.J. Lu <hjl.tools@gmail.com>2020-04-03 11:49:29 -0700
commitbbcdf9bb3fd04adc59f41e4e1ff6293c84cbecc4 (patch)
tree5c1ef890db487b0e125329f83bdff7c4a856315d
parentb949f8e2acb49273b2f08ecaa3bc7128baaad850 (diff)
downloadgcc-bbcdf9bb3fd04adc59f41e4e1ff6293c84cbecc4.zip
gcc-bbcdf9bb3fd04adc59f41e4e1ff6293c84cbecc4.tar.gz
gcc-bbcdf9bb3fd04adc59f41e4e1ff6293c84cbecc4.tar.bz2
x86: Mark scratch operand in ssse3_pshufbv8qi3 as earlyclobber
commit 16ed2601ad0a4aa82f11e9df86ea92183f94f979 Author: H.J. Lu <hongjiu.lu@intel.com> Date: Wed May 15 15:26:19 2019 +0000 i386: Emulate MMX pshufb with SSE version has +(define_insn_and_split "ssse3_pshufbv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")] + UNSPEC_PSHUFB)) + (clobber (match_scratch:V4SI 3 "=X,x,Yv"))] ^^^ There are earlyclobber. + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + pshufb\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 3) + (and:V4SI (match_dup 3) (match_dup 2))) + (set (match_dup 0) + (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))] If input register operand 2 is dead after this insn, RA may choose it as scratch operand. Since it isn't marked as earlyclobber, operand 2 becomes unused after split and then it gets optimized out. Mark scratch operand as earlyclobber fixes the issue. gcc/ PR target/94467 * config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand as earlyclobber. gcc/testsuite/ PR target/94467 * gcc.target/i386/pr94467-1.c: New test. * gcc.target/i386/pr94467-2.c: Likewise.
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/i386/sse.md2
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr94467-1.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/pr94467-2.c48
5 files changed, 101 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e9dfa71..6317e38 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2020-04-03 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/94467
+ * config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand
+ as earlyclobber.
+
2020-04-03 Jeff Law <law@redhat.com>
PR rtl-optimization/92264
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 24b3acd..fef6065 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16695,7 +16695,7 @@
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
(match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
UNSPEC_PSHUFB))
- (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
+ (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
"@
pshufb\t{%2, %0|%0, %2}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 67a14db..aad627d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2020-04-03 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/94467
+ * gcc.target/i386/pr94467-1.c: New test.
+ * gcc.target/i386/pr94467-2.c: Likewise.
+
2020-04-03 Jakub Jelinek <jakub@redhat.com>
PR target/94460
diff --git a/gcc/testsuite/gcc.target/i386/pr94467-1.c b/gcc/testsuite/gcc.target/i386/pr94467-1.c
new file mode 100644
index 0000000..a51c3a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94467-1.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O -mavx" } */
+
+#include "avx-check.h"
+
+typedef char __attribute__ ((__vector_size__ (8))) v8qi;
+typedef short __attribute__ ((__vector_size__ (8))) v4hi;
+typedef int __attribute__ ((__vector_size__ (8))) v2si;
+typedef long long __attribute__ ((__vector_size__ (8))) v1di;
+typedef unsigned long long u64;
+u64 k, c;
+
+v8qi g, h, p, q;
+v4hi d, e, f, l, n, o;
+v2si j;
+
+u64
+foo (v4hi r)
+{
+ v8qi s;
+ f = (v4hi) j;
+ e = __builtin_ia32_psrlwi ((v4hi) k, c);
+ s = __builtin_ia32_pavgb (h, h);
+ n = __builtin_ia32_pabsw (f);
+ o = __builtin_ia32_psubusw (n, l);
+ p = __builtin_ia32_packsswb (r, o);
+ q = __builtin_ia32_pshufb (p, s);
+ g = __builtin_ia32_punpcklbw (q, (v8qi) r);
+ d = r;
+ return (u64) g + (u64) h + (u64) j;
+}
+
+static void
+avx_test (void)
+{
+ u64 x = foo ((v4hi) { 5 });
+ if (x != 0x0005000500050505)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr94467-2.c b/gcc/testsuite/gcc.target/i386/pr94467-2.c
new file mode 100644
index 0000000..8128be3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94467-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-require-effective-target ssse3 } */
+/* { dg-options "-O -mssse3" } */
+
+#ifndef CHECK_H
+#define CHECK_H "ssse3-check.h"
+#endif
+
+#ifndef TEST
+#define TEST ssse3_test
+#endif
+
+#include CHECK_H
+
+typedef char __attribute__ ((__vector_size__ (8))) v8qi;
+typedef short __attribute__ ((__vector_size__ (8))) v4hi;
+typedef int __attribute__ ((__vector_size__ (8))) v2si;
+typedef long long __attribute__ ((__vector_size__ (8))) v1di;
+typedef unsigned long long u64;
+u64 k, c;
+
+v8qi g, h, p, q;
+v4hi d, e, f, l, n, o;
+v2si j;
+
+u64
+foo (v4hi r)
+{
+ v8qi s;
+ f = (v4hi) j;
+ e = __builtin_ia32_psrlwi ((v4hi) k, c);
+ s = __builtin_ia32_pavgb (h, h);
+ n = __builtin_ia32_pabsw (f);
+ o = __builtin_ia32_psubusw (n, l);
+ p = __builtin_ia32_packsswb (r, o);
+ q = __builtin_ia32_pshufb (p, s);
+ g = __builtin_ia32_punpcklbw (q, (v8qi) r);
+ d = r;
+ return (u64) g + (u64) h + (u64) j;
+}
+
+static void
+ssse3_test (void)
+{
+ u64 x = foo ((v4hi) { 5 });
+ if (x != 0x0005000500050505)
+ __builtin_abort ();
+}