aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2020-05-27 16:32:10 +0200
committerUros Bizjak <ubizjak@gmail.com>2020-05-27 16:32:10 +0200
commit240198fe0812402e6085033e43d34decdec3c0cf (patch)
treebd3a88260c85a26089921557c8ba1b12af5c45ac /gcc
parent6c66c692043d680a5efcd9616b94f9629c417038 (diff)
downloadgcc-240198fe0812402e6085033e43d34decdec3c0cf.zip
gcc-240198fe0812402e6085033e43d34decdec3c0cf.tar.gz
gcc-240198fe0812402e6085033e43d34decdec3c0cf.tar.bz2
i386: Implement V2SF shuffles
2020-05-27 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: * config/i386/mmx.md (mmx_pswapdsf2): Add SSE alternatives. Enable insn pattern for TARGET_MMX_WITH_SSE. (*mmx_movshdup): New insn pattern. (*mmx_movsldup): Ditto. (*mmx_movss): Ditto. * config/i386/i386-expand.c (ix86_vectorize_vec_perm_const): Handle E_V2SFmode. (expand_vec_perm_movs): Handle E_V2SFmode. (expand_vec_perm_even_odd): Ditto. (expand_vec_perm_broadcast_1): Assert that E_V2SFmode is already handled by standard shuffle patterns. gcc/testsuite/ChangeLog: * gcc.target/i386/vperm-v2sf.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-expand.c14
-rw-r--r--gcc/config/i386/mmx.md89
-rw-r--r--gcc/testsuite/gcc.target/i386/vperm-v2sf.c41
3 files changed, 124 insertions, 20 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 338b4f7..96f70ae 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -16319,6 +16319,7 @@ expand_vec_perm_movs (struct expand_vec_perm_d *d)
return false;
if (!(TARGET_SSE && vmode == V4SFmode)
+ && !(TARGET_MMX_WITH_SSE && vmode == V2SFmode)
&& !(TARGET_SSE2 && vmode == V2DFmode))
return false;
@@ -18639,6 +18640,13 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
/* These are always directly implementable by expand_vec_perm_1. */
gcc_unreachable ();
+ case E_V2SFmode:
+ gcc_assert (TARGET_MMX_WITH_SSE);
+ /* We have no suitable instructions. */
+ if (d->testing_p)
+ return false;
+ break;
+
case E_V4HImode:
if (d->testing_p)
break;
@@ -18834,8 +18842,9 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
gcc_unreachable ();
case E_V2DFmode:
- case E_V2DImode:
+ case E_V2SFmode:
case E_V4SFmode:
+ case E_V2DImode:
case E_V2SImode:
case E_V4SImode:
/* These are always implementable using standard shuffle patterns. */
@@ -19329,6 +19338,7 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
if (d.testing_p && TARGET_SSSE3)
return true;
break;
+ case E_V2SFmode:
case E_V2SImode:
case E_V4HImode:
if (!TARGET_MMX_WITH_SSE)
@@ -19367,7 +19377,7 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
/* Implementable with shufps or pshufd. */
if (d.one_operand_p
- && (d.vmode == V4SFmode
+ && (d.vmode == V4SFmode || d.vmode == V2SFmode
|| d.vmode == V4SImode || d.vmode == V2SImode))
return true;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 215162d..271c1c2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -938,32 +938,85 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "mmx_pswapdv2sf2"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
- (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
- (parallel [(const_int 1) (const_int 0)])))]
- "TARGET_3DNOW_A"
- "pswapd\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmxcvt")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+ (vec_select:V2SF
+ (match_operand:V2SF 1 "register_mmxmem_operand" "ym,0,Yv")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "TARGET_3DNOW_A || TARGET_MMX_WITH_SSE"
+ "@
+ pswapd\t{%1, %0|%0, %1}
+ shufps\t{$0xe1, %1, %0|%0, %1, 0xe1}
+ vshufps\t{$0xe1, %1, %1, %0|%0, %1, %1, 0xe1}"
+ [(set_attr "isa" "*,sse_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxcvt,ssemov,ssemov")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
+
+(define_insn "*mmx_movshdup"
+ [(set (match_operand:V2SF 0 "register_operand" "=v,x")
+ (vec_select:V2SF
+ (match_operand:V2SF 1 "register_operand" "v,0")
+ (parallel [(const_int 1) (const_int 1)])))]
+ "TARGET_MMX_WITH_SSE"
+ "@
+ %vmovshdup\t{%1, %0|%0, %1}
+ shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}"
+ [(set_attr "isa" "sse3,*")
+ (set_attr "type" "sse,sseshuf1")
+ (set_attr "length_immediate" "*,1")
+ (set_attr "prefix_rep" "1,*")
+ (set_attr "prefix" "maybe_vex,orig")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*mmx_movsldup"
+ [(set (match_operand:V2SF 0 "register_operand" "=v,x")
+ (vec_select:V2SF
+ (match_operand:V2SF 1 "register_operand" "v,0")
+ (parallel [(const_int 0) (const_int 0)])))]
+ "TARGET_MMX_WITH_SSE"
+ "@
+ %vmovsldup\t{%1, %0|%0, %1}
+ shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
+ [(set_attr "isa" "sse3,*")
+ (set_attr "type" "sse,sseshuf1")
+ (set_attr "length_immediate" "*,1")
+ (set_attr "prefix_rep" "1,*")
+ (set_attr "prefix" "maybe_vex,orig")
+ (set_attr "mode" "V4SF")])
(define_insn "*vec_dupv2sf"
- [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+ [(set (match_operand:V2SF 0 "register_operand" "=y,Yv,x")
(vec_duplicate:V2SF
- (match_operand:SF 1 "register_operand" "0,0,Yv")))]
+ (match_operand:SF 1 "register_operand" "0,Yv,0")))]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
punpckldq\t%0, %0
- shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}
- %vmovsldup\t{%1, %0|%0, %1}"
- [(set_attr "isa" "*,sse_noavx,sse3")
+ %vmovsldup\t{%1, %0|%0, %1}
+ shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}"
+ [(set_attr "isa" "*,sse3,sse_noavx")
(set_attr "mmx_isa" "native,*,*")
- (set_attr "type" "mmxcvt,sseshuf1,sse")
- (set_attr "length_immediate" "*,1,*")
- (set_attr "prefix_rep" "*,*,1")
- (set_attr "prefix" "*,orig,maybe_vex")
+ (set_attr "type" "mmxcvt,sse,sseshuf1")
+ (set_attr "length_immediate" "*,*,1")
+ (set_attr "prefix_rep" "*,1,*")
+ (set_attr "prefix" "*,maybe_vex,orig")
(set_attr "mode" "DI,V4SF,V4SF")])
+(define_insn "*mmx_movss"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,v")
+ (vec_merge:V2SF
+ (match_operand:V2SF 2 "register_operand" " x,v")
+ (match_operand:V2SF 1 "register_operand" " 0,v")
+ (const_int 1)))]
+ "TARGET_MMX_WITH_SSE"
+ "@
+ movss\t{%2, %0|%0, %2}
+ vmovss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "mode" "SF")])
+
(define_insn "*mmx_concatv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=y,y")
(vec_concat:V2SF
@@ -2015,7 +2068,7 @@
(vec_select:V2SI
(match_operand:V2SI 1 "register_mmxmem_operand" "ym,Yv")
(parallel [(const_int 1) (const_int 0)])))]
- "TARGET_3DNOW_A || TARGET_MMX_WITH_SSE"
+ "TARGET_3DNOW_A"
"@
pswapd\t{%1, %0|%0, %1}
%vpshufd\t{$0xe1, %1, %0|%0, %1, 0xe1}";
diff --git a/gcc/testsuite/gcc.target/i386/vperm-v2sf.c b/gcc/testsuite/gcc.target/i386/vperm-v2sf.c
new file mode 100644
index 0000000..7bf6def
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vperm-v2sf.c
@@ -0,0 +1,41 @@
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef float S;
+typedef float V __attribute__((vector_size(8)));
+typedef int IV __attribute__((vector_size(8)));
+typedef union { S s[2]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1) \
+ b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \
+ c.s[0] = i[0].s[E0]; \
+ c.s[1] = i[0].s[E1]; \
+ __asm__("" : : : "memory"); \
+ assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-2-2.inc"
+
+int main()
+{
+ check_isa ();
+
+ if (!sse_os_support ())
+ exit (0);
+
+ i[0].s[0] = 0;
+ i[0].s[1] = 1;
+ i[0].s[2] = 2;
+ i[0].s[3] = 3;
+
+ check();
+ return 0;
+}