aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2021-06-17 15:19:12 +0200
committerUros Bizjak <ubizjak@gmail.com>2021-06-17 15:19:54 +0200
commit20a2c8ace0ab56c147fd995432abd5e7cf89b0e3 (patch)
tree4afae4736cbfbb3f4854511e7316a52fce061481 /gcc
parentf1555d4013ed3cae2589270436387063d1c2f1a3 (diff)
downloadgcc-20a2c8ace0ab56c147fd995432abd5e7cf89b0e3.zip
gcc-20a2c8ace0ab56c147fd995432abd5e7cf89b0e3.tar.gz
gcc-20a2c8ace0ab56c147fd995432abd5e7cf89b0e3.tar.bz2
i386: Add variable vec_set for 64bit vectors [PR97194]
To generate sane code a SSE4.1 variable PBLENDV instruction is needed. 2021-06-17 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/97194 * config/i386/i386-expand.c (expand_vector_set_var): Handle V2FS mode remapping. Pass TARGET_MMX_WITH_SSE to ix86_expand_vector_init_duplicate. (ix86_expand_vector_init_duplicate): Emit insv_1 for QImode for !TARGET_PARTIAL_REG_STALL. * config/i386/predicates.md (vec_setm_mmx_operand): New predicate. * config/i386/mmx.md (vec_setv2sf): Use vec_setm_mmx_operand as operand 2 predicate. Call ix86_expand_vector_set_var for non-constant index operand. (vec_setv2si): Ditto. (vec_setv4hi): Ditto. (vec_setv8qi): ditto. gcc/testsuite/ PR target/97194 * gcc.target/i386/sse4_1-vec-set-1.c: New test. * gcc.target/i386/sse4_1-vec-set-2.c: ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-expand.c24
-rw-r--r--gcc/config/i386/mmx.md36
-rw-r--r--gcc/config/i386/predicates.md6
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c45
5 files changed, 119 insertions, 18 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index eb6f9b0..8f4e4e4 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -13811,10 +13811,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
wsmode = GET_MODE_INNER (wvmode);
val = convert_modes (wsmode, smode, val, true);
- x = expand_simple_binop (wsmode, ASHIFT, val,
- GEN_INT (GET_MODE_BITSIZE (smode)),
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+ if (smode == QImode && !TARGET_PARTIAL_REG_STALL)
+ emit_insn (gen_insv_1 (wsmode, val, val));
+ else
+ {
+ x = expand_simple_binop (wsmode, ASHIFT, val,
+ GEN_INT (GET_MODE_BITSIZE (smode)),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ val = expand_simple_binop (wsmode, IOR, val, x, x, 1,
+ OPTAB_LIB_WIDEN);
+ }
x = gen_reg_rtx (wvmode);
ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
@@ -14788,6 +14795,9 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
case E_V8DFmode:
cmp_mode = V8DImode;
break;
+ case E_V2SFmode:
+ cmp_mode = V2SImode;
+ break;
case E_V4SFmode:
cmp_mode = V4SImode;
break;
@@ -14809,9 +14819,11 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
idxv = gen_reg_rtx (cmp_mode);
idx_tmp = convert_to_mode (GET_MODE_INNER (cmp_mode), idx, 1);
- ok = ix86_expand_vector_init_duplicate (false, mode, valv, val);
+ ok = ix86_expand_vector_init_duplicate (TARGET_MMX_WITH_SSE,
+ mode, valv, val);
gcc_assert (ok);
- ok = ix86_expand_vector_init_duplicate (false, cmp_mode, idxv, idx_tmp);
+ ok = ix86_expand_vector_init_duplicate (TARGET_MMX_WITH_SSE,
+ cmp_mode, idxv, idx_tmp);
gcc_assert (ok);
vec[0] = target;
vec[1] = valv;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 59a16f4..a107ac5 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1279,11 +1279,14 @@
(define_expand "vec_setv2sf"
[(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
- ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@@ -2989,11 +2992,14 @@
(define_expand "vec_setv2si"
[(match_operand:V2SI 0 "register_operand")
(match_operand:SI 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
- ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@@ -3145,11 +3151,14 @@
(define_expand "vec_setv4hi"
[(match_operand:V4HI 0 "register_operand")
(match_operand:HI 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
- ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@@ -3177,11 +3186,14 @@
(define_expand "vec_setv8qi"
[(match_operand:V8QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
- (match_operand 2 "const_int_operand")]
+ (match_operand 2 "vec_setm_mmx_operand")]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
{
- ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
- INTVAL (operands[2]));
+ if (CONST_INT_P (operands[2]))
+ ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+ INTVAL (operands[2]));
+ else
+ ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3dd134e..e7a8968 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1026,6 +1026,12 @@
(match_test "TARGET_AVX2"))
(match_code "const_int")))
+(define_predicate "vec_setm_mmx_operand"
+ (ior (and (match_operand 0 "register_operand")
+ (match_test "TARGET_SSE4_1")
+ (match_test "TARGET_MMX_WITH_SSE"))
+ (match_code "const_int")))
+
;; True for registers, or 1 or -1. Used to optimize double-word shifts.
(define_predicate "reg_or_pm1_operand"
(ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c
new file mode 100644
index 0000000..7c7fd34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-msse4.1 -O2" } */
+/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 4 } } */
+/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 4 } } */
+
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+
+#define FOO(VTYPE, TYPE) \
+ VTYPE \
+ __attribute__ ((noipa)) \
+ foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \
+ { \
+ a[c] = b; \
+ return a; \
+ } \
+
+FOO (v8qi, char);
+
+FOO (v4hi, short);
+
+FOO (v2si, int);
+
+FOO (v2sf, float);
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c
new file mode 100644
index 0000000..24f8041
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c
@@ -0,0 +1,45 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+
+#ifndef CHECK
+#define CHECK "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK
+
+#include "sse4_1-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx) \
+do \
+ { \
+ int i,val = idx * idx - idx * 3 + 16; \
+ type res[N],exp[N]; \
+ vtype resv; \
+ for (i = 0; i < N; i++) \
+ { \
+ res[i] = i * i - i * 3 + 15; \
+ exp[i] = res[i]; \
+ } \
+ exp[idx] = val; \
+ resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
+ for (i = 0; i < N; i++) \
+ { \
+ if (resv[i] != exp[i]) \
+ abort (); \
+ } \
+ } \
+while (0)
+
+static void
+TEST (void)
+{
+ CALC_TEST (v8qi, char, 8, 5);
+ CALC_TEST (v4hi, short, 4, 2);
+ CALC_TEST (v2si, int, 2, 1);
+}