diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2021-07-06 19:27:34 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2021-07-06 19:28:35 +0200 |
commit | f65878178ab05180a5937f11f8fdb755678a82ce (patch) | |
tree | 7ac7267296f21e9246f3ac19a4b5b81687cf633e /gcc | |
parent | 6b096c17314a46f285fa26670048f287a399573f (diff) | |
download | gcc-f65878178ab05180a5937f11f8fdb755678a82ce.zip gcc-f65878178ab05180a5937f11f8fdb755678a82ce.tar.gz gcc-f65878178ab05180a5937f11f8fdb755678a82ce.tar.bz2 |
i386: Add variable vec_set for 32bit vectors [PR97194]
To generate sane code a SSE4.1 variable PBLENDV instruction is needed.
Also enable variable vec_set through vec_setm_operand predicate
for TARGET_SSE4_1 instead of TARGET_AVX2. ix86_expand_vector_init_duplicate
is able to emulate vpbroadcast{b,w} with pxor/pshufb.
2021-07-06 Uroš Bizjak <ubizjak@gmail.com>
gcc/
PR target/97194
* config/i386/predicates.md (vec_setm_operand): Enable
register_operand for TARGET_SSE4_1.
* config/i386/mmx.md (vec_setv2hi): Use vec_setm_operand
as operand 2 predicate. Call ix86_expand_vector_set_var
for non-constant index operand.
(vec_setv4qi): Use vec_setm_mmx_operand as operand 2 predicate.
Call ix86_expand_vector_set_var for non-constant index operand.
gcc/testsuite/
PR target/97194
* gcc.target/i386/sse4_1-vec-set-1a.c: New test.
* gcc.target/i386/sse4_1-vec-set-2a.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/mmx.md | 18 | ||||
-rw-r--r-- | gcc/config/i386/predicates.md | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c | 44 |
4 files changed, 77 insertions, 7 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 4ead8be..7e83b64 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3534,11 +3534,14 @@ (define_expand "vec_setv2hi" [(match_operand:V2HI 0 "register_operand") (match_operand:HI 1 "register_operand") - (match_operand 2 "const_int_operand")] + (match_operand 2 "vec_setm_operand")] "TARGET_SSE2" { - ix86_expand_vector_set (false, operands[0], operands[1], - INTVAL (operands[2])); + if (CONST_INT_P (operands[2])) + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + else + ix86_expand_vector_set_var (operands[0], operands[1], operands[2]); DONE; }) @@ -3556,11 +3559,14 @@ (define_expand "vec_setv4qi" [(match_operand:V4QI 0 "register_operand") (match_operand:QI 1 "register_operand") - (match_operand 2 "const_int_operand")] + (match_operand 2 "vec_setm_mmx_operand")] "TARGET_SSE4_1" { - ix86_expand_vector_set (false, operands[0], operands[1], - INTVAL (operands[2])); + if (CONST_INT_P (operands[2])) + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + else + ix86_expand_vector_set_var (operands[0], operands[1], operands[2]); DONE; }) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index c4b35c8..9488632 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1023,7 +1023,7 @@ ;; True for registers, or const_int_operand, used to vec_setm expander. (define_predicate "vec_setm_operand" (ior (and (match_operand 0 "register_operand") - (match_test "TARGET_AVX2")) + (match_test "TARGET_SSE4_1")) (match_code "const_int"))) (define_predicate "vec_setm_mmx_operand" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c new file mode 100644 index 0000000..e2a67a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-msse4.1 -O2" } */ +/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 2 } } */ +/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 2 } } */ + +typedef char v4qi __attribute__ ((vector_size (4))); +typedef short v2hi __attribute__ ((vector_size (4))); + +#define FOO(VTYPE, TYPE) \ + VTYPE \ + __attribute__ ((noipa)) \ + foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \ + { \ + a[c] = b; \ + return a; \ + } \ + +FOO (v4qi, char); + +FOO (v2hi, short); diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c new file mode 100644 index 0000000..5a945be --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c @@ -0,0 +1,44 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + + +#ifndef CHECK +#define CHECK "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK + +#include "sse4_1-vec-set-1a.c" + +#define CALC_TEST(vtype, type, N, idx) \ +do \ + { \ + int i,val = idx * idx - idx * 3 + 16; \ + type res[N],exp[N]; \ + vtype resv; \ + for (i = 0; i < N; i++) \ + { \ + res[i] = i * i - i * 3 + 15; \ + exp[i] = res[i]; \ + } \ + exp[idx] = val; \ + resv = foo_##vtype (*(vtype *)&res[0], val, idx); \ + for (i = 0; i < N; i++) \ + { \ + if (resv[i] != exp[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + CALC_TEST (v4qi, char, 4, 2); + CALC_TEST (v2hi, short, 2, 1); +} |