aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2023-03-15 20:33:48 +0100
committerUros Bizjak <ubizjak@gmail.com>2023-03-15 20:35:37 +0100
commit57052c6ed59c1a2ee4a67982f960e08593956955 (patch)
tree7666451538edb09a30194ba56181216fafd37eba
parent901edd99b44976b3c2b13a7d525d9e315540186a (diff)
downloadgcc-57052c6ed59c1a2ee4a67982f960e08593956955.zip
gcc-57052c6ed59c1a2ee4a67982f960e08593956955.tar.gz
gcc-57052c6ed59c1a2ee4a67982f960e08593956955.tar.bz2
i386: Fix blend vector permutation for 8-byte modes
8-byte modes should be processed only for TARGET_MMX_WITH_SSE. Handle V2SFmode and fix V2HImode handling. The resulting BLEND instructions are always faster than MOVSS/MOVSD, so prioritize them w.r.t MOVSS/MOVSD for TARGET_SSE4_1. gcc/ChangeLog: * config/i386/i386-expand.cc (expand_vec_perm_blend): Handle 8-byte modes only with TARGET_MMX_WITH_SSE. Handle V2SFmode and fix V2HImode handling. (expand_vec_perm_1): Try to emit BLEND instruction before MOVSS/MOVSD. * config/i386/mmx.md (*mmx_blendps): New insn pattern. gcc/testsuite/ChangeLog: * gcc.target/i386/merge-1.c (dg-options): Use -mno-sse4. * gcc.target/i386/sse2-mmx-21.c (dg-options): Ditto. * gcc.target/i386/sse-movss-4.c (dg-options): Use -mno-sse4. Simplify scan-assembler-not strings. * gcc.target/i386/sse2-movsd-3.c (dg-options): Ditto. * gcc.target/i386/sse2-mmx-movss-1.c: New test.
-rw-r--r--gcc/config/i386/i386-expand.cc19
-rw-r--r--gcc/config/i386/mmx.md19
-rw-r--r--gcc/testsuite/gcc.target/i386/merge-1.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-movss-4.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-mmx-21.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-mmx-movss-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-movsd-3.c10
7 files changed, 53 insertions, 18 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index e89abf2..1545d43 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -19007,9 +19007,10 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
;
else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
;
- else if (TARGET_SSE4_1 && (GET_MODE_SIZE (vmode) == 16
- || GET_MODE_SIZE (vmode) == 8
- || GET_MODE_SIZE (vmode) == 4))
+ else if (TARGET_SSE4_1
+ && (GET_MODE_SIZE (vmode) == 16
+ || (TARGET_MMX_WITH_SSE && GET_MODE_SIZE (vmode) == 8)
+ || GET_MODE_SIZE (vmode) == 4))
;
else
return false;
@@ -19042,6 +19043,8 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
case E_V8SFmode:
case E_V2DFmode:
case E_V4SFmode:
+ case E_V2SFmode:
+ case E_V2HImode:
case E_V4HImode:
case E_V8HImode:
case E_V8SImode:
@@ -19897,11 +19900,15 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
}
}
+ /* Try the SSE4.1 blend variable merge instructions. */
+ if (expand_vec_perm_blend (d))
+ return true;
+
/* Try movss/movsd instructions. */
if (expand_vec_perm_movs (d))
return true;
- /* Finally, try the fully general two operand permute. */
+ /* Try the fully general two operand permute. */
if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
d->testing_p))
return true;
@@ -19924,10 +19931,6 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
return true;
}
- /* Try the SSE4.1 blend variable merge instructions. */
- if (expand_vec_perm_blend (d))
- return true;
-
/* Try one of the AVX vpermil variable permutations. */
if (expand_vec_perm_vpermil (d))
return true;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f9c6611..18dae03 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1154,6 +1154,25 @@
DONE;
})
+(define_insn "*mmx_blendps"
+ [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
+ (vec_merge:V2SF
+ (match_operand:V2SF 2 "register_operand" "Yr,*x,x")
+ (match_operand:V2SF 1 "register_operand" "0,0,x")
+ (match_operand:SI 3 "const_0_to_3_operand")))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "@
+ blendps\t{%3, %2, %0|%0, %2, %3}
+ blendps\t{%3, %2, %0|%0, %2, %3}
+ vblendps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix_data16" "1,1,*")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "mmx_blendvps"
[(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
(unspec:V2SF
diff --git a/gcc/testsuite/gcc.target/i386/merge-1.c b/gcc/testsuite/gcc.target/i386/merge-1.c
index d525685..b018eb1 100644
--- a/gcc/testsuite/gcc.target/i386/merge-1.c
+++ b/gcc/testsuite/gcc.target/i386/merge-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -msse2" } */
+/* { dg-options "-O1 -msse2 -mno-sse4" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-movss-4.c b/gcc/testsuite/gcc.target/i386/sse-movss-4.c
index ec3019c..d8a8a03 100644
--- a/gcc/testsuite/gcc.target/i386/sse-movss-4.c
+++ b/gcc/testsuite/gcc.target/i386/sse-movss-4.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msse" } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
typedef unsigned int v4si __attribute__((vector_size(16)));
typedef float v4sf __attribute__((vector_size(16)));
@@ -7,7 +7,7 @@ typedef float v4sf __attribute__((vector_size(16)));
v4si foo(v4si x,v4si y) { return (v4si){y[0],x[1],x[2],x[3]}; }
v4sf bar(v4sf x,v4sf y) { return (v4sf){y[0],x[1],x[2],x[3]}; }
-/* { dg-final { scan-assembler-times "\tv?movss\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tmovss\t" 2 } } */
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
-/* { dg-final { scan-assembler-not "vpblendw" } } */
+/* { dg-final { scan-assembler-not "pblendw" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
index 8f5341e..7f8098a 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-21.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-options "-O2 -msse2 -mno-mmx -mno-sse4" } */
/* { dg-final { scan-assembler-times "pshufd" 1 } } */
/* { dg-final { scan-assembler-times "movd" 1 } } */
/* { dg-final { scan-assembler-not "%mm" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-movss-1.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-movss-1.c
new file mode 100644
index 0000000..bb79628
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-movss-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-sse4" } */
+
+typedef unsigned int v2si __attribute__((vector_size(8)));
+typedef float v2sf __attribute__((vector_size(8)));
+
+v2si foo(v2si x,v2si y) { return (v2si){y[0],x[1]}; }
+v2sf bar(v2sf x,v2sf y) { return (v2sf){y[0],x[1]}; }
+
+/* { dg-final { scan-assembler-times "\tmovss\t" 2 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+/* { dg-final { scan-assembler-not "pblendw" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse2-movsd-3.c b/gcc/testsuite/gcc.target/i386/sse2-movsd-3.c
index fadbe2b..edd4a44 100644
--- a/gcc/testsuite/gcc.target/i386/sse2-movsd-3.c
+++ b/gcc/testsuite/gcc.target/i386/sse2-movsd-3.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse2 -mno-sse4" } */
typedef unsigned long long v2di __attribute__((vector_size(16)));
typedef double v2df __attribute__((vector_size(16)));
@@ -7,9 +7,9 @@ typedef double v2df __attribute__((vector_size(16)));
v2di foo(v2di x,v2di y) { return (v2di){y[0],x[1]}; }
v2df bar(v2df x,v2df y) { return (v2df){y[0],x[1]}; }
-/* { dg-final { scan-assembler-times "\tv?movsd\t" 2 } } */
-/* { dg-final { scan-assembler-not "v?shufpd" } } */
+/* { dg-final { scan-assembler-times "\tmovsd\t" 2 } } */
+/* { dg-final { scan-assembler-not "shufpd" } } */
/* { dg-final { scan-assembler-not "movdqa" } } */
/* { dg-final { scan-assembler-not "pshufd" } } */
-/* { dg-final { scan-assembler-not "v?punpckldq" } } */
-/* { dg-final { scan-assembler-not "v?movq" } } */
+/* { dg-final { scan-assembler-not "punpckldq" } } */
+/* { dg-final { scan-assembler-not "movq" } } */