diff options
-rw-r--r-- | gcc/config/i386/i386-expand.c | 6 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 176 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vperm-v2hi.c | 41 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vperm-v4qi.c | 47 |
4 files changed, 268 insertions, 2 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 4185f58..eb7cdb0 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -14968,6 +14968,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) return; case E_V8HImode: + case E_V2HImode: use_vec_merge = TARGET_SSE2; break; case E_V4HImode: @@ -14975,6 +14976,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) break; case E_V16QImode: + case E_V4QImode: use_vec_merge = TARGET_SSE4_1; break; @@ -15274,6 +15276,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) break; case E_V8HImode: + case E_V2HImode: use_vec_extr = TARGET_SSE2; break; case E_V4HImode: @@ -15294,6 +15297,9 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) return; } break; + case E_V4QImode: + use_vec_extr = TARGET_SSE4_1; + break; case E_V8SFmode: if (TARGET_AVX) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index f39e062..914e5e9 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3092,7 +3092,7 @@ [(match_operand:V8QI 0 "register_operand") (match_operand:QI 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX || TARGET_MMX_WITH_SSE" + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" { ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1], INTVAL (operands[2])); @@ -3103,7 +3103,7 @@ [(match_operand:QI 0 "register_operand") (match_operand:V8QI 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX || TARGET_MMX_WITH_SSE" + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" { ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0], operands[1], INTVAL (operands[2])); @@ -3120,6 +3120,178 @@ DONE; }) +(define_insn "*pinsrw" + [(set (match_operand:V2HI 0 "register_operand" "=x,YW") + (vec_merge:V2HI + (vec_duplicate:V2HI + (match_operand:HI 2 "nonimmediate_operand" "rm,rm")) + (match_operand:V2HI 1 "register_operand" "0,YW") + (match_operand:SI 3 "const_int_operand")))] + "TARGET_SSE2 + && ((unsigned) exact_log2 (INTVAL (operands[3])) + < GET_MODE_NUNITS (V2HImode))" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + switch (which_alternative) + { + case 1: + if (MEM_P (operands[2])) + return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + else + return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; + case 0: + if (MEM_P (operands[2])) + return "pinsrw\t{%3, %2, %0|%0, %2, %3}"; + else + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") + (set_attr "length_immediate" "1") + (set_attr "mode" "TI")]) + +(define_insn "*pinsrb" + [(set (match_operand:V4QI 0 "register_operand" "=x,YW") + (vec_merge:V4QI + (vec_duplicate:V4QI + (match_operand:QI 2 "nonimmediate_operand" "rm,rm")) + (match_operand:V4QI 1 "register_operand" "0,YW") + (match_operand:SI 3 "const_int_operand")))] + "TARGET_SSE4_1 + && ((unsigned) exact_log2 (INTVAL (operands[3])) + < GET_MODE_NUNITS (V4QImode))" +{ + operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); + switch (which_alternative) + { + case 1: + if (MEM_P (operands[2])) + return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + else + return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; + case 0: + if (MEM_P (operands[2])) + return "pinsrb\t{%3, %2, %0|%0, %2, %3}"; + else + return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "noavx,avx") + (set_attr "type" "sselog") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "TI")]) + +(define_insn "*pextrw" + [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,m") + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "YW,YW") + (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n,n")])))] + "TARGET_SSE2" + "@ + %vpextrw\t{%2, %1, %k0|%k0, %1, %2} + %vpextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse4") + (set_attr "type" "sselog1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + +(define_insn "*pextrw_zext" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (zero_extend:SWI48 + (vec_select:HI + (match_operand:V2HI 1 "register_operand" "YW") + (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")]))))] + "TARGET_SSE2" + "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "sselog1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + +(define_insn "*pextrb" + [(set (match_operand:QI 0 "nonimmediate_operand" "=r,m") + (vec_select:QI + (match_operand:V4QI 1 "register_operand" "YW,YW") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")])))] + "TARGET_SSE4_1" + "@ + %vpextrb\t{%2, %1, %k0|%k0, %1, %2} + %vpextrb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + +(define_insn "*pextrb_zext" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (zero_extend:SWI248 + (vec_select:QI + (match_operand:V4QI 1 "register_operand" "YW") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))] + "TARGET_SSE4_1" + "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_data16" "1") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + +(define_expand "vec_setv2hi" + [(match_operand:V2HI 0 "register_operand") + (match_operand:HI 1 "register_operand") + (match_operand 2 "const_int_operand")] + "TARGET_SSE2" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv2hihi" + [(match_operand:HI 0 "register_operand") + (match_operand:V2HI 1 "register_operand") + (match_operand 2 "const_int_operand")] + "TARGET_SSE2" +{ + ix86_expand_vector_extract (false, operands[0], + operands[1], INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_setv4qi" + [(match_operand:V4QI 0 "register_operand") + (match_operand:QI 1 "register_operand") + (match_operand 2 "const_int_operand")] + "TARGET_SSE4_1" +{ + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extractv4qiqi" + [(match_operand:QI 0 "register_operand") + (match_operand:V4QI 1 "register_operand") + (match_operand 2 "const_int_operand")] + "TARGET_SSE4_1" +{ + ix86_expand_vector_extract (false, operands[0], + operands[1], INTVAL (operands[2])); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Miscellaneous diff --git a/gcc/testsuite/gcc.target/i386/vperm-v2hi.c b/gcc/testsuite/gcc.target/i386/vperm-v2hi.c new file mode 100644 index 0000000..0af94f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vperm-v2hi.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O -msse2" } */ +/* { dg-require-effective-target sse2 } */ + +#include "isa-check.h" +#include "sse-os-support.h" + +typedef short S; +typedef short V __attribute__((vector_size(4))); +typedef short IV __attribute__((vector_size(4))); +typedef union { S s[2]; V v; } U; + +static U i[2], b, c; + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); +#define assert(T) ((T) || (__builtin_trap (), 0)) + +#define TEST(E0, E1) \ + b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \ + c.s[0] = i[0].s[E0]; \ + c.s[1] = i[0].s[E1]; \ + __asm__("" : : : "memory"); \ + assert (memcmp (&b, &c, sizeof(c)) == 0); + +#include "vperm-2-2.inc" + +int main() +{ + check_isa (); + + if (!sse_os_support ()) + exit (0); + + i[0].s[0] = 0; + i[0].s[1] = 1; + i[0].s[2] = 2; + i[0].s[3] = 3; + + check(); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/vperm-v4qi.c b/gcc/testsuite/gcc.target/i386/vperm-v4qi.c new file mode 100644 index 0000000..57fa547 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vperm-v4qi.c @@ -0,0 +1,47 @@ +/* { dg-do run } */ +/* { dg-options "-O -msse2" } */ +/* { dg-require-effective-target sse2 } */ + +#include "isa-check.h" +#include "sse-os-support.h" + +typedef char S; +typedef char V __attribute__((vector_size(4))); +typedef char IV __attribute__((vector_size(4))); +typedef union { S s[4]; V v; } U; + +static U i[2], b, c; + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); +#define assert(T) ((T) || (__builtin_trap (), 0)) + +#define TEST(E0, E1, E2, E3) \ + b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \ + c.s[0] = i[0].s[E0]; \ + c.s[1] = i[0].s[E1]; \ + c.s[2] = i[0].s[E2]; \ + c.s[3] = i[0].s[E3]; \ + __asm__("" : : : "memory"); \ + assert (memcmp (&b, &c, sizeof(c)) == 0); + +#include "vperm-4-2.inc" + +int main() +{ + check_isa (); + + if (!sse_os_support ()) + exit (0); + + i[0].s[0] = 0; + i[0].s[1] = 1; + i[0].s[2] = 2; + i[0].s[3] = 3; + i[0].s[4] = 4; + i[0].s[5] = 5; + i[0].s[6] = 6; + i[0].s[7] = 7; + + check(); + return 0; +} |