aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config/i386/i386-expand.c6
-rw-r--r--gcc/config/i386/mmx.md176
-rw-r--r--gcc/testsuite/gcc.target/i386/vperm-v2hi.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/vperm-v4qi.c47
4 files changed, 268 insertions, 2 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 4185f58..eb7cdb0 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -14968,6 +14968,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
return;
case E_V8HImode:
+ case E_V2HImode:
use_vec_merge = TARGET_SSE2;
break;
case E_V4HImode:
@@ -14975,6 +14976,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
break;
case E_V16QImode:
+ case E_V4QImode:
use_vec_merge = TARGET_SSE4_1;
break;
@@ -15274,6 +15276,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
break;
case E_V8HImode:
+ case E_V2HImode:
use_vec_extr = TARGET_SSE2;
break;
case E_V4HImode:
@@ -15294,6 +15297,9 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
return;
}
break;
+ case E_V4QImode:
+ use_vec_extr = TARGET_SSE4_1;
+ break;
case E_V8SFmode:
if (TARGET_AVX)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f39e062..914e5e9 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3092,7 +3092,7 @@
[(match_operand:V8QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
INTVAL (operands[2]));
@@ -3103,7 +3103,7 @@
[(match_operand:QI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand 2 "const_int_operand")]
- "TARGET_MMX || TARGET_MMX_WITH_SSE"
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
operands[1], INTVAL (operands[2]));
@@ -3120,6 +3120,178 @@
DONE;
})
+(define_insn "*pinsrw"
+ [(set (match_operand:V2HI 0 "register_operand" "=x,YW")
+ (vec_merge:V2HI
+ (vec_duplicate:V2HI
+ (match_operand:HI 2 "nonimmediate_operand" "rm,rm"))
+ (match_operand:V2HI 1 "register_operand" "0,YW")
+ (match_operand:SI 3 "const_int_operand")))]
+ "TARGET_SSE2
+ && ((unsigned) exact_log2 (INTVAL (operands[3]))
+ < GET_MODE_NUNITS (V2HImode))"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ switch (which_alternative)
+ {
+ case 1:
+ if (MEM_P (operands[2]))
+ return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ else
+ return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+ case 0:
+ if (MEM_P (operands[2]))
+ return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+ else
+ return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*pinsrb"
+ [(set (match_operand:V4QI 0 "register_operand" "=x,YW")
+ (vec_merge:V4QI
+ (vec_duplicate:V4QI
+ (match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
+ (match_operand:V4QI 1 "register_operand" "0,YW")
+ (match_operand:SI 3 "const_int_operand")))]
+ "TARGET_SSE4_1
+ && ((unsigned) exact_log2 (INTVAL (operands[3]))
+ < GET_MODE_NUNITS (V4QImode))"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ switch (which_alternative)
+ {
+ case 1:
+ if (MEM_P (operands[2]))
+ return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ else
+ return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+ case 0:
+ if (MEM_P (operands[2]))
+ return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
+ else
+ return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*pextrw"
+ [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,m")
+ (vec_select:HI
+ (match_operand:V2HI 1 "register_operand" "YW,YW")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n,n")])))]
+ "TARGET_SSE2"
+ "@
+ %vpextrw\t{%2, %1, %k0|%k0, %1, %2}
+ %vpextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse4")
+ (set_attr "type" "sselog1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*pextrw_zext"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (zero_extend:SWI48
+ (vec_select:HI
+ (match_operand:V2HI 1 "register_operand" "YW")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")]))))]
+ "TARGET_SSE2"
+ "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*pextrb"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
+ (vec_select:QI
+ (match_operand:V4QI 1 "register_operand" "YW,YW")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")])))]
+ "TARGET_SSE4_1"
+ "@
+ %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
+ %vpextrb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*pextrb_zext"
+ [(set (match_operand:SWI248 0 "register_operand" "=r")
+ (zero_extend:SWI248
+ (vec_select:QI
+ (match_operand:V4QI 1 "register_operand" "YW")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
+ "TARGET_SSE4_1"
+ "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_expand "vec_setv2hi"
+ [(match_operand:V2HI 0 "register_operand")
+ (match_operand:HI 1 "register_operand")
+ (match_operand 2 "const_int_operand")]
+ "TARGET_SSE2"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv2hihi"
+ [(match_operand:HI 0 "register_operand")
+ (match_operand:V2HI 1 "register_operand")
+ (match_operand 2 "const_int_operand")]
+ "TARGET_SSE2"
+{
+ ix86_expand_vector_extract (false, operands[0],
+ operands[1], INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_setv4qi"
+ [(match_operand:V4QI 0 "register_operand")
+ (match_operand:QI 1 "register_operand")
+ (match_operand 2 "const_int_operand")]
+ "TARGET_SSE4_1"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv4qiqi"
+ [(match_operand:QI 0 "register_operand")
+ (match_operand:V4QI 1 "register_operand")
+ (match_operand 2 "const_int_operand")]
+ "TARGET_SSE4_1"
+{
+ ix86_expand_vector_extract (false, operands[0],
+ operands[1], INTVAL (operands[2]));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Miscellaneous
diff --git a/gcc/testsuite/gcc.target/i386/vperm-v2hi.c b/gcc/testsuite/gcc.target/i386/vperm-v2hi.c
new file mode 100644
index 0000000..0af94f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vperm-v2hi.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef short S;
+typedef short V __attribute__((vector_size(4)));
+typedef short IV __attribute__((vector_size(4)));
+typedef union { S s[2]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1) \
+ b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \
+ c.s[0] = i[0].s[E0]; \
+ c.s[1] = i[0].s[E1]; \
+ __asm__("" : : : "memory"); \
+ assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-2-2.inc"
+
+int main()
+{
+ check_isa ();
+
+ if (!sse_os_support ())
+ exit (0);
+
+ i[0].s[0] = 0;
+ i[0].s[1] = 1;
+ i[0].s[2] = 2;
+ i[0].s[3] = 3;
+
+ check();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vperm-v4qi.c b/gcc/testsuite/gcc.target/i386/vperm-v4qi.c
new file mode 100644
index 0000000..57fa547
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vperm-v4qi.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef char S;
+typedef char V __attribute__((vector_size(4)));
+typedef char IV __attribute__((vector_size(4)));
+typedef union { S s[4]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1, E2, E3) \
+ b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
+ c.s[0] = i[0].s[E0]; \
+ c.s[1] = i[0].s[E1]; \
+ c.s[2] = i[0].s[E2]; \
+ c.s[3] = i[0].s[E3]; \
+ __asm__("" : : : "memory"); \
+ assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-4-2.inc"
+
+int main()
+{
+ check_isa ();
+
+ if (!sse_os_support ())
+ exit (0);
+
+ i[0].s[0] = 0;
+ i[0].s[1] = 1;
+ i[0].s[2] = 2;
+ i[0].s[3] = 3;
+ i[0].s[4] = 4;
+ i[0].s[5] = 5;
+ i[0].s[6] = 6;
+ i[0].s[7] = 7;
+
+ check();
+ return 0;
+}