aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/config/i386/predicates.md99
-rw-r--r--gcc/config/i386/sse.md294
-rw-r--r--gcc/testsuite/ChangeLog8
-rw-r--r--gcc/testsuite/gcc.target/i386/pr66560-1.c35
-rw-r--r--gcc/testsuite/gcc.target/i386/pr66560-2.c35
-rw-r--r--gcc/testsuite/gcc.target/i386/pr66560-3.c35
-rw-r--r--gcc/testsuite/gcc.target/i386/pr66560-4.c35
8 files changed, 395 insertions, 161 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cd7074e..b514755 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2015-06-23 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/66560
+ * config/i386/predicates.md (addsub_vm_operator): New predicate.
+ (addsub_vs_operator): Ditto.
+ (addsub_vs_parallel): Ditto.
+ * config/i386/sse.md (ssedoublemode): Add V4SF and V2DF modes.
+ (avx_addsubv4df3, avx_addsubv8sf3, sse3_addsubv2df3, sse3_addsubv4sf3):
+ Put minus RTX before plus and adjust vec_merge selector.
+ (*avx_addsubv4df3_1, *avx_addsubv4df3_1s, *sse3_addsubv2df3_1)
+ (*sse_addsubv2df3_1s, *avx_addsubv8sf3_1, *avx_addsubv8sf3_1s)
+ (*sse3_addsubv4sf3_1, *sse_addsubv4sf3_1s): Remove insn patterns.
+ (addsub vec_merge splitters): New combiner splitters.
+ (addsub vec_select/vec_concat splitters): Ditto.
+
2015-06-23 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/66449
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b7bb84f..4e45246 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1426,8 +1426,105 @@
(and (match_code "unspec_volatile")
(match_test "XINT (op, 1) == UNSPECV_VZEROUPPER")))
-;; Return true if OP is a parallel for a vbroadcast permute.
+;; Return true if OP is an addsub vec_merge operation
+(define_predicate "addsub_vm_operator"
+ (match_code "vec_merge")
+{
+ rtx op0, op1;
+ int swapped;
+ HOST_WIDE_INT mask;
+ int nunits, elt;
+
+ op0 = XEXP (op, 0);
+ op1 = XEXP (op, 1);
+
+ /* Sanity check. */
+ if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
+ swapped = 0;
+ else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
+ swapped = 1;
+ else
+ gcc_unreachable ();
+
+ mask = INTVAL (XEXP (op, 2));
+ nunits = GET_MODE_NUNITS (mode);
+
+ for (elt = 0; elt < nunits; elt++)
+ {
+ /* bit clear: take from op0, set: take from op1 */
+ int bit = !(mask & (HOST_WIDE_INT_1U << elt));
+
+ if (bit != ((elt & 1) ^ swapped))
+ return false;
+ }
+
+ return true;
+})
+
+;; Return true if OP is an addsub vec_select/vec_concat operation
+(define_predicate "addsub_vs_operator"
+ (and (match_code "vec_select")
+ (match_code "vec_concat" "0"))
+{
+ rtx op0, op1;
+ bool swapped;
+ int nunits, elt;
+
+ op0 = XEXP (XEXP (op, 0), 0);
+ op1 = XEXP (XEXP (op, 0), 1);
+
+ /* Sanity check. */
+ if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
+ swapped = false;
+ else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
+ swapped = true;
+ else
+ gcc_unreachable ();
+
+ nunits = GET_MODE_NUNITS (mode);
+ if (XVECLEN (XEXP (op, 1), 0) != nunits)
+ return false;
+
+ /* We already checked that permutation is suitable for addsub,
+ so only look at the first element of the parallel. */
+ elt = INTVAL (XVECEXP (XEXP (op, 1), 0, 0));
+ return elt == (swapped ? nunits : 0);
+})
+
+;; Return true if OP is a parallel for an addsub vec_select.
+(define_predicate "addsub_vs_parallel"
+ (and (match_code "parallel")
+ (match_code "const_int" "a"))
+{
+ int nelt = XVECLEN (op, 0);
+ int elt, i;
+
+ if (nelt < 2)
+ return false;
+
+ /* Check that the permutation is suitable for addsub.
+ For example, { 0 9 2 11 4 13 6 15 } or { 8 1 10 3 12 5 14 7 }. */
+ elt = INTVAL (XVECEXP (op, 0, 0));
+ if (elt == 0)
+ {
+ for (i = 1; i < nelt; ++i)
+ if (INTVAL (XVECEXP (op, 0, i)) != (i + (i & 1) * nelt))
+ return false;
+ }
+ else if (elt == nelt)
+ {
+ for (i = 1; i < nelt; ++i)
+ if (INTVAL (XVECEXP (op, 0, i)) != (elt + i - (i & 1) * nelt))
+ return false;
+ }
+ else
+ return false;
+
+ return true;
+})
+
+;; Return true if OP is a parallel for a vbroadcast permute.
(define_predicate "avx_vbroadcast_operand"
(and (match_code "parallel")
(match_code "const_int" "a"))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d1277ca..9c95816 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -487,10 +487,12 @@
(V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
(define_mode_attr ssedoublemode
- [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
- (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
- (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
- (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
+ [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
+ (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
+ (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
+ (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
+ (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
+ (V4DI "V8DI") (V8DI "V16DI")])
(define_mode_attr ssebytemode
[(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
@@ -2021,43 +2023,11 @@
(define_insn "avx_addsubv4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_merge:V4DF
- (plus:V4DF
+ (minus:V4DF
(match_operand:V4DF 1 "register_operand" "x")
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
- (minus:V4DF (match_dup 1) (match_dup 2))
- (const_int 10)))]
- "TARGET_AVX"
- "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4DF")])
-
-(define_insn "*avx_addsubv4df3_1"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (vec_select:V4DF
- (vec_concat:V8DF
- (minus:V4DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
- (plus:V4DF (match_dup 1) (match_dup 2)))
- (parallel [(const_int 0) (const_int 5)
- (const_int 2) (const_int 7)])))]
- "TARGET_AVX"
- "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4DF")])
-
-(define_insn "*avx_addsubv4df3_1s"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
- (vec_select:V4DF
- (vec_concat:V8DF
- (minus:V4DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
- (plus:V4DF (match_dup 2) (match_dup 1)))
- (parallel [(const_int 0) (const_int 5)
- (const_int 2) (const_int 7)])))]
+ (plus:V4DF (match_dup 1) (match_dup 2))
+ (const_int 5)))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
@@ -2067,49 +2037,11 @@
(define_insn "sse3_addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
- (plus:V2DF
+ (minus:V2DF
(match_operand:V2DF 1 "register_operand" "0,x")
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
- (minus:V2DF (match_dup 1) (match_dup 2))
- (const_int 2)))]
- "TARGET_SSE3"
- "@
- addsubpd\t{%2, %0|%0, %2}
- vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseadd")
- (set_attr "atom_unit" "complex")
- (set_attr "prefix" "orig,vex")
- (set_attr "mode" "V2DF")])
-
-(define_insn "*sse3_addsubv2df3_1"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x")
- (vec_select:V2DF
- (vec_concat:V4DF
- (minus:V2DF
- (match_operand:V2DF 1 "register_operand" "0,x")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
- (plus:V2DF (match_dup 1) (match_dup 2)))
- (parallel [(const_int 0) (const_int 3)])))]
- "TARGET_SSE3"
- "@
- addsubpd\t{%2, %0|%0, %2}
- vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseadd")
- (set_attr "atom_unit" "complex")
- (set_attr "prefix" "orig,vex")
- (set_attr "mode" "V2DF")])
-
-(define_insn "*sse3_addsubv2df3_1s"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x")
- (vec_select:V2DF
- (vec_concat:V4DF
- (minus:V2DF
- (match_operand:V2DF 1 "register_operand" "0,x")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
- (plus:V2DF (match_dup 2) (match_dup 1)))
- (parallel [(const_int 0) (const_int 3)])))]
+ (plus:V2DF (match_dup 1) (match_dup 2))
+ (const_int 1)))]
"TARGET_SSE3"
"@
addsubpd\t{%2, %0|%0, %2}
@@ -2123,47 +2055,11 @@
(define_insn "avx_addsubv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_merge:V8SF
- (plus:V8SF
+ (minus:V8SF
(match_operand:V8SF 1 "register_operand" "x")
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
- (minus:V8SF (match_dup 1) (match_dup 2))
- (const_int 170)))]
- "TARGET_AVX"
- "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "*avx_addsubv8sf3_1"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (vec_select:V8SF
- (vec_concat:V16SF
- (minus:V8SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
- (plus:V8SF (match_dup 1) (match_dup 2)))
- (parallel [(const_int 0) (const_int 9)
- (const_int 2) (const_int 11)
- (const_int 4) (const_int 13)
- (const_int 6) (const_int 15)])))]
- "TARGET_AVX"
- "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "*avx_addsubv8sf3_1s"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (vec_select:V8SF
- (vec_concat:V16SF
- (minus:V8SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
- (plus:V8SF (match_dup 2) (match_dup 1)))
- (parallel [(const_int 0) (const_int 9)
- (const_int 2) (const_int 11)
- (const_int 4) (const_int 13)
- (const_int 6) (const_int 15)])))]
+ (plus:V8SF (match_dup 1) (match_dup 2))
+ (const_int 85)))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
@@ -2173,11 +2069,11 @@
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
- (plus:V4SF
+ (minus:V4SF
(match_operand:V4SF 1 "register_operand" "0,x")
(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
- (minus:V4SF (match_dup 1) (match_dup 2))
- (const_int 10)))]
+ (plus:V4SF (match_dup 1) (match_dup 2))
+ (const_int 5)))]
"TARGET_SSE3"
"@
addsubps\t{%2, %0|%0, %2}
@@ -2188,45 +2084,123 @@
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
-(define_insn "*sse3_addsubv4sf3_1"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (minus:V4SF
- (match_operand:V4SF 1 "register_operand" "0,x")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
- (plus:V4SF (match_dup 1) (match_dup 2)))
- (parallel [(const_int 0) (const_int 5)
- (const_int 2) (const_int 7)])))]
- "TARGET_SSE3"
- "@
- addsubps\t{%2, %0|%0, %2}
- vaddsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
- (set_attr "prefix_rep" "1,*")
- (set_attr "mode" "V4SF")])
+(define_split
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (match_operator:VF_128_256 6 "addsub_vm_operator"
+ [(minus:VF_128_256
+ (match_operand:VF_128_256 1 "register_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand"))
+ (plus:VF_128_256
+ (match_operand:VF_128_256 3 "nonimmediate_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand"))
+ (match_operand 5 "const_int_operand")]))]
+ "TARGET_SSE3
+ && can_create_pseudo_p ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ [(set (match_dup 0)
+ (vec_merge:VF_128_256
+ (minus:VF_128_256 (match_dup 1) (match_dup 2))
+ (plus:VF_128_256 (match_dup 1) (match_dup 2))
+ (match_dup 5)))])
-(define_insn "*sse3_addsubv4sf3_1s"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
- (vec_select:V4SF
- (vec_concat:V8SF
- (minus:V4SF
- (match_operand:V4SF 1 "register_operand" "0,x")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
- (plus:V4SF (match_dup 2) (match_dup 1)))
- (parallel [(const_int 0) (const_int 5)
- (const_int 2) (const_int 7)])))]
- "TARGET_SSE3"
- "@
- addsubps\t{%2, %0|%0, %2}
- vaddsubps\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
- (set_attr "prefix_rep" "1,*")
- (set_attr "mode" "V4SF")])
+(define_split
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (match_operator:VF_128_256 6 "addsub_vm_operator"
+ [(plus:VF_128_256
+ (match_operand:VF_128_256 1 "nonimmediate_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand"))
+ (minus:VF_128_256
+ (match_operand:VF_128_256 3 "register_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand"))
+ (match_operand 5 "const_int_operand")]))]
+ "TARGET_SSE3
+ && can_create_pseudo_p ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ [(set (match_dup 0)
+ (vec_merge:VF_128_256
+ (minus:VF_128_256 (match_dup 3) (match_dup 4))
+ (plus:VF_128_256 (match_dup 3) (match_dup 4))
+ (match_dup 5)))]
+{
+ /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
+ operands[5]
+ = GEN_INT (~INTVAL (operands[5])
+ & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
+})
+
+(define_split
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (match_operator:VF_128_256 7 "addsub_vs_operator"
+ [(vec_concat:<ssedoublemode>
+ (minus:VF_128_256
+ (match_operand:VF_128_256 1 "register_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand"))
+ (plus:VF_128_256
+ (match_operand:VF_128_256 3 "nonimmediate_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand")))
+ (match_parallel 5 "addsub_vs_parallel"
+ [(match_operand 6 "const_int_operand")])]))]
+ "TARGET_SSE3
+ && can_create_pseudo_p ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ [(set (match_dup 0)
+ (vec_merge:VF_128_256
+ (minus:VF_128_256 (match_dup 1) (match_dup 2))
+ (plus:VF_128_256 (match_dup 1) (match_dup 2))
+ (match_dup 5)))]
+{
+ int i, nelt = XVECLEN (operands[5], 0);
+ HOST_WIDE_INT ival = 0;
+
+ for (i = 0; i < nelt; i++)
+ if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
+ ival |= HOST_WIDE_INT_1 << i;
+
+ operands[5] = GEN_INT (ival);
+})
+
+(define_split
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (match_operator:VF_128_256 7 "addsub_vs_operator"
+ [(vec_concat:<ssedoublemode>
+ (plus:VF_128_256
+ (match_operand:VF_128_256 1 "nonimmediate_operand")
+ (match_operand:VF_128_256 2 "nonimmediate_operand"))
+ (minus:VF_128_256
+ (match_operand:VF_128_256 3 "register_operand")
+ (match_operand:VF_128_256 4 "nonimmediate_operand")))
+ (match_parallel 5 "addsub_vs_parallel"
+ [(match_operand 6 "const_int_operand")])]))]
+ "TARGET_SSE3
+ && can_create_pseudo_p ()
+ && ((rtx_equal_p (operands[1], operands[3])
+ && rtx_equal_p (operands[2], operands[4]))
+ || (rtx_equal_p (operands[1], operands[4])
+ && rtx_equal_p (operands[2], operands[3])))"
+ [(set (match_dup 0)
+ (vec_merge:VF_128_256
+ (minus:VF_128_256 (match_dup 3) (match_dup 4))
+ (plus:VF_128_256 (match_dup 3) (match_dup 4))
+ (match_dup 5)))]
+{
+ int i, nelt = XVECLEN (operands[5], 0);
+ HOST_WIDE_INT ival = 0;
+
+ for (i = 0; i < nelt; i++)
+ if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
+ ival |= HOST_WIDE_INT_1 << i;
+
+ operands[5] = GEN_INT (ival);
+})
(define_insn "avx_h<plusminus_insn>v4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f17ae0d..8e2ab43 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2015-06-23 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/66560
+ * gcc.target/i386/pr66560-1.c: New test.
+ * gcc.target/i386/pr66560-2.c: Ditto.
+ * gcc.target/i386/pr66560-3.c: Ditto.
+ * gcc.target/i386/pr66560-4.c: Ditto.
+
2015-06-23 Thomas Schwinge <thomas@codesourcery.com>
* gcc.target/nvptx/nvptx.exp: New file.
diff --git a/gcc/testsuite/gcc.target/i386/pr66560-1.c b/gcc/testsuite/gcc.target/i386/pr66560-1.c
new file mode 100644
index 0000000..b535ca7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66560-1.c
@@ -0,0 +1,35 @@
+/* PR target/66560 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4" } */
+
+typedef float v4sf __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+v4sf foo1 (v4sf x, v4sf y)
+{
+ v4sf tem0 = x - y;
+ v4sf tem1 = x + y;
+ return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
+}
+
+v4sf foo2 (v4sf x, v4sf y)
+{
+ v4sf tem0 = x - y;
+ v4sf tem1 = y + x;
+ return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
+}
+
+v4sf foo3 (v4sf x, v4sf y)
+{
+ v4sf tem0 = x + y;
+ v4sf tem1 = x - y;
+ return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
+}
+
+v4sf foo4 (v4sf x, v4sf y)
+{
+ v4sf tem0 = y + x;
+ v4sf tem1 = x - y;
+ return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
+}
+
+/* { dg-final { scan-assembler-times "addsubps" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66560-2.c b/gcc/testsuite/gcc.target/i386/pr66560-2.c
new file mode 100644
index 0000000..c308f3d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66560-2.c
@@ -0,0 +1,35 @@
+/* PR target/66560 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+typedef long long v2di __attribute__((vector_size(16)));
+v2df foo1 (v2df x, v2df y)
+{
+ v2df tem0 = x - y;
+ v2df tem1 = x + y;
+ return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
+}
+
+v2df foo2 (v2df x, v2df y)
+{
+ v2df tem0 = x - y;
+ v2df tem1 = y + x;
+ return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
+}
+
+v2df foo3 (v2df x, v2df y)
+{
+ v2df tem0 = x + y;
+ v2df tem1 = x - y;
+ return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
+}
+
+v2df foo4 (v2df x, v2df y)
+{
+ v2df tem0 = y + x;
+ v2df tem1 = x - y;
+ return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
+}
+
+/* { dg-final { scan-assembler-times "addsubpd" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66560-3.c b/gcc/testsuite/gcc.target/i386/pr66560-3.c
new file mode 100644
index 0000000..22f19d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66560-3.c
@@ -0,0 +1,35 @@
+/* PR target/66560 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef float v8sf __attribute__((vector_size(32)));
+typedef int v8si __attribute__((vector_size(32)));
+v8sf foo1 (v8sf x, v8sf y)
+{
+ v8sf tem0 = x - y;
+ v8sf tem1 = x + y;
+ return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
+}
+
+v8sf foo2 (v8sf x, v8sf y)
+{
+ v8sf tem0 = x - y;
+ v8sf tem1 = y + x;
+ return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
+}
+
+v8sf foo3 (v8sf x, v8sf y)
+{
+ v8sf tem0 = x + y;
+ v8sf tem1 = x - y;
+ return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 });
+}
+
+v8sf foo4 (v8sf x, v8sf y)
+{
+ v8sf tem0 = y + x;
+ v8sf tem1 = x - y;
+ return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 });
+}
+
+/* { dg-final { scan-assembler-times "vaddsubps" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr66560-4.c b/gcc/testsuite/gcc.target/i386/pr66560-4.c
new file mode 100644
index 0000000..a8a6e90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr66560-4.c
@@ -0,0 +1,35 @@
+/* PR target/66560 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef double v4df __attribute__((vector_size(32)));
+typedef long long v4di __attribute__((vector_size(32)));
+v4df foo1 (v4df x, v4df y)
+{
+ v4df tem0 = x - y;
+ v4df tem1 = x + y;
+ return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
+}
+
+v4df foo2 (v4df x, v4df y)
+{
+ v4df tem0 = x - y;
+ v4df tem1 = y + x;
+ return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
+}
+
+v4df foo3 (v4df x, v4df y)
+{
+ v4df tem0 = x + y;
+ v4df tem1 = x - y;
+ return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
+}
+
+v4df foo4 (v4df x, v4df y)
+{
+ v4df tem0 = y + x;
+ v4df tem1 = x - y;
+ return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
+}
+
+/* { dg-final { scan-assembler-times "vaddsubpd" 4 } } */