aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2021-06-14 20:56:18 +0200
committerUros Bizjak <ubizjak@gmail.com>2021-06-14 20:57:11 +0200
commit4986946f3b761dd4c3e0d79ca735c90e33f4bb83 (patch)
tree5eb905e4f2360921f7c74b695a422b82f86deade
parent93bfadf3a1db7d73e9ca4a4a3d40f7f81ea16d39 (diff)
downloadgcc-4986946f3b761dd4c3e0d79ca735c90e33f4bb83.zip
gcc-4986946f3b761dd4c3e0d79ca735c90e33f4bb83.tar.gz
gcc-4986946f3b761dd4c3e0d79ca735c90e33f4bb83.tar.bz2
i386: Split V2HImode *punpckwd to SSE instruction [PR101058]
V2HImode *punpckwd should not be split to the insn that depends on TARGET_MMX_WITH_SSE, since the later is disabled on 32bit targets. Also return true early from ix86_vectorize_vec_perm_const when testing with V2HI mode. *punpckwd can be used to implement all permutations. 2021-06-14 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/101058 * config/i386/i386-expand.c (ix86_vectorize_vec_perm_const): Return true early when testing with V2HImode. * config/i386/mmx.md (*punpckwd): Split to sse2_pshuflw_1. gcc/testsuite/ PR target/101058 * gcc.target/i386/pr101058.c: New test.
-rw-r--r--gcc/config/i386/i386-expand.c9
-rw-r--r--gcc/config/i386/mmx.md13
-rw-r--r--gcc/testsuite/gcc.target/i386/pr101058.c12
3 files changed, 25 insertions, 9 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 6e33f6f..dee3df2 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -20446,9 +20446,12 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
return false;
break;
case E_V2HImode:
- if (!TARGET_SSE2)
- return false;
- break;
+ if (!TARGET_SSE2)
+ return false;
+ /* All implementable with *punpckwd. */
+ if (d.testing_p)
+ return true;
+ break;
case E_V2DImode:
case E_V2DFmode:
if (!TARGET_SSE)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f9e7d27..1a9e7b0 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3368,16 +3368,18 @@
(vec_concat:V4HI
(match_operand:V2HI 1 "register_operand" "0,Yw")
(match_operand:V2HI 2 "register_operand" "x,Yw"))
- (parallel [(match_operand 3 "const_0_to_3_operand")
- (match_operand 4 "const_0_to_3_operand")])))]
+ (parallel [(match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")])))]
"TARGET_SSE2"
"#"
"&& reload_completed"
[(set (match_dup 5)
- (vec_select:V4HI
+ (vec_select:V8HI
(match_dup 5)
(parallel [(match_dup 3) (match_dup 4)
- (const_int 0) (const_int 0)])))]
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
{
rtx dest = lowpart_subreg (V8HImode, operands[0], V2HImode);
rtx op1 = lowpart_subreg (V8HImode, operands[1], V2HImode);
@@ -3395,8 +3397,7 @@
operands[3] = GEN_INT (sel0);
operands[4] = GEN_INT (sel1);
-
- operands[5] = lowpart_subreg (V4HImode, dest, V8HImode);
+ operands[5] = dest;
}
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
diff --git a/gcc/testsuite/gcc.target/i386/pr101058.c b/gcc/testsuite/gcc.target/i386/pr101058.c
new file mode 100644
index 0000000..a2b251c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101058.c
@@ -0,0 +1,12 @@
+/* PR target/101058 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -msse2 -mno-mmx" } */
+
+short add90Hybrid_a_1;
+short *add90Hybrid_b, *add90Hybrid_c, *add90Hybrid_d;
+void add90Hybrid() {
+ for (int i; i < 200; i += 2) {
+ add90Hybrid_c[i] = add90Hybrid_b[i];
+ add90Hybrid_d[i] = add90Hybrid_a_1 - add90Hybrid_b[i + 1];
+ }
+}