aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2021-07-06 19:27:34 +0200
committerUros Bizjak <ubizjak@gmail.com>2021-07-06 19:28:35 +0200
commitf65878178ab05180a5937f11f8fdb755678a82ce (patch)
tree7ac7267296f21e9246f3ac19a4b5b81687cf633e /gcc
parent6b096c17314a46f285fa26670048f287a399573f (diff)
downloadgcc-f65878178ab05180a5937f11f8fdb755678a82ce.zip
gcc-f65878178ab05180a5937f11f8fdb755678a82ce.tar.gz
gcc-f65878178ab05180a5937f11f8fdb755678a82ce.tar.bz2
i386: Add variable vec_set for 32bit vectors [PR97194]
To generate sane code a SSE4.1 variable PBLENDV instruction is needed. Also enable variable vec_set through vec_setm_operand predicate for TARGET_SSE4_1 instead of TARGET_AVX2. ix86_expand_vector_init_duplicate is able to emulate vpbroadcast{b,w} with pxor/pshufb. 2021-07-06 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/97194 * config/i386/predicates.md (vec_setm_operand): Enable register_operand for TARGET_SSE4_1. * config/i386/mmx.md (vec_setv2hi): Use vec_setm_operand as operand 2 predicate. Call ix86_expand_vector_set_var for non-constant index operand. (vec_setv4qi): Use vec_setm_mmx_operand as operand 2 predicate. Call ix86_expand_vector_set_var for non-constant index operand. gcc/testsuite/ PR target/97194 * gcc.target/i386/sse4_1-vec-set-1a.c: New test. * gcc.target/i386/sse4_1-vec-set-2a.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/mmx.md18
-rw-r--r--gcc/config/i386/predicates.md2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c44
4 files changed, 77 insertions, 7 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 4ead8be..7e83b64 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3534,11 +3534,14 @@
(define_expand "vec_setv2hi"
[(match_operand:V2HI 0 "register_operand")
(match_operand:HI 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_operand")]
"TARGET_SSE2"
{
- ix86_expand_vector_set (false, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@@ -3556,11 +3559,14 @@
(define_expand "vec_setv4qi"
[(match_operand:V4QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_SSE4_1"
{
- ix86_expand_vector_set (false, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index c4b35c8..9488632 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1023,7 +1023,7 @@
;; True for registers, or const_int_operand, used to vec_setm expander.
(define_predicate "vec_setm_operand"
(ior (and (match_operand 0 "register_operand")
- (match_test "TARGET_AVX2"))
+ (match_test "TARGET_SSE4_1"))
(match_code "const_int")))
(define_predicate "vec_setm_mmx_operand"
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c
new file mode 100644
index 0000000..e2a67a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-msse4.1 -O2" } */
+/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 2 } } */
+
+typedef char v4qi __attribute__ ((vector_size (4)));
+typedef short v2hi __attribute__ ((vector_size (4)));
+
+#define FOO(VTYPE, TYPE) \
+ VTYPE \
+ __attribute__ ((noipa)) \
+ foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \
+ { \
+ a[c] = b; \
+ return a; \
+ } \
+
+FOO (v4qi, char);
+
+FOO (v2hi, short);
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c
new file mode 100644
index 0000000..5a945be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c
@@ -0,0 +1,44 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+
+#ifndef CHECK
+#define CHECK "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK
+
+#include "sse4_1-vec-set-1a.c"
+
+#define CALC_TEST(vtype, type, N, idx) \
+do \
+ { \
+ int i,val = idx * idx - idx * 3 + 16; \
+ type res[N],exp[N]; \
+ vtype resv; \
+ for (i = 0; i < N; i++) \
+ { \
+ res[i] = i * i - i * 3 + 15; \
+ exp[i] = res[i]; \
+ } \
+ exp[idx] = val; \
+ resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
+ for (i = 0; i < N; i++) \
+ { \
+ if (resv[i] != exp[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+TEST (void)
+{
+ CALC_TEST (v4qi, char, 4, 2);
+ CALC_TEST (v2hi, short, 2, 1);
+}