diff options
-rw-r--r-- | gcc/config/i386/i386.c | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 43 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 284 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/store_merging_18.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/store_merging_29.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr103861.c | 23 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92658-sse4.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/warn-vect-op-2.c | 4 |
11 files changed, 362 insertions, 13 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ec15582..4e02b26 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19306,7 +19306,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, } /* Require movement to gpr, and then store to memory. */ - if ((mode == HFmode || mode == HImode) + if ((mode == HFmode || mode == HImode || mode == V2QImode) && !TARGET_SSE4_1 && SSE_CLASS_P (rclass) && !in_p && MEM_P (x)) @@ -22082,6 +22082,8 @@ ix86_vector_mode_supported_p (machine_mode mode) if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE_3DNOW (mode)) return true; + if (mode == V2QImode) + return true; return false; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 398f751..3adb1cb 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1039,7 +1039,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \ || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode \ || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ - || (MODE) == V2DImode || (MODE) == DFmode || (MODE) == HFmode) + || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode \ + || (MODE) == HFmode) #define VALID_SSE_REG_MODE(MODE) \ ((MODE) == V1TImode || (MODE) == TImode \ @@ -1072,7 +1073,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == SDmode || (MODE) == DDmode \ || (MODE) == HFmode || (MODE) == HCmode \ || (MODE) == V2HImode || (MODE) == V2HFmode \ - || (MODE) == V1SImode || (MODE) == V4QImode \ + || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode \ || (TARGET_64BIT \ && ((MODE) == TImode || (MODE) == CTImode \ || (MODE) == TFmode || (MODE) == TCmode \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e670e7d..cd95509 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6931,6 +6931,30 @@ operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]); }) +(define_insn "*subqi_ext<mode>_2" + [(set (zero_extract:SWI248 + (match_operand:SWI248 0 "register_operand" "+Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (minus:QI + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 1 "register_operand" "0") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 2 "register_operand" "Q") + (const_int 8) + (const_int 8)) 0)) 0)) + (clobber (reg:CC FLAGS_REG))] + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ + rtx_equal_p (operands[0], operands[1])" + "sub{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + (define_insn "*subv<mode>4" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus:<DWI> @@ -10901,6 +10925,25 @@ [(set_attr "type" "negnot") (set_attr "mode" "<MODE>")]) +(define_insn "*negqi_ext<mode>_2" + [(set (zero_extract:SWI248 + (match_operand:SWI248 0 "register_operand" "+Q") + (const_int 8) + (const_int 8)) + (subreg:SWI248 + (neg:QI + (subreg:QI + (zero_extract:SWI248 + (match_operand:SWI248 1 "register_operand" "0") + (const_int 8) + (const_int 8)) 0)) 0)) + (clobber (reg:CC FLAGS_REG))] + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ + rtx_equal_p (operands[0], operands[1])" + "neg{b}\t%h0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) + ;; Negate with jump on overflow. (define_expand "negv<mode>3" [(parallel [(set (reg:CCO FLAGS_REG) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index e394cba..c4e71c2 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -261,8 +261,8 @@ "=r ,m ,v,v,v,m,r,v") (match_operand:V_32 1 "general_operand" "rmC,rC,C,v,m,v,v,r"))] - "TARGET_SSE2 && - !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "TARGET_SSE2 + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) { @@ -359,6 +359,174 @@ DONE; }) +(define_expand "movv2qi" + [(set (match_operand:V2QI 0 "nonimmediate_operand") + (match_operand:V2QI 1 "nonimmediate_operand"))] + "" +{ + ix86_expand_vector_move (V2QImode, operands); + DONE; +}) + +(define_insn "*movv2qi_internal" + [(set (match_operand:V2QI 0 "nonimmediate_operand" + "=r,r,r,m ,v,v,v,m,r,v") + (match_operand:V2QI 1 "general_operand" + "r ,C,m,rC,C,v,m,v,v,r"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOV: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; + + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + + case TYPE_SSELOG1: + if (satisfies_constraint_C (operands[1])) + return standard_sse_constant_opcode (insn, operands); + + if (SSE_REG_P (operands[0])) + return MEM_P (operands[1]) + ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}" + : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}"; + else + return MEM_P (operands[0]) + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}" + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}"; + + case TYPE_SSEMOV: + return ix86_output_ssemov (insn, operands); + + default: + gcc_unreachable (); + } +} + [(set (attr "isa") + (cond [(eq_attr "alternative" "4,5,6,8,9") + (const_string "sse2") + (eq_attr "alternative" "7") + (const_string "sse4") + ] + (const_string "*"))) + (set (attr "type") + (cond [(eq_attr "alternative" "6,7,8,9") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "ssemov") + (const_string "sselog1")) + (eq_attr "alternative" "4") + (const_string "sselog1") + (eq_attr "alternative" "5") + (const_string "ssemov") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:V2QI 1 "aligned_operand")) + (const_string "imov") + (and (match_test "TARGET_MOVX") + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "prefix") + (cond [(eq_attr "alternative" "4,5,6,7,8,9") + (const_string "maybe_evex") + ] + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "6,7,8,9") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "TI")) + (eq_attr "alternative" "4") + (cond [(match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")) + (eq_attr "alternative" "5") + (cond [(match_test "TARGET_AVX512FP16") + (const_string "HI") + (match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")) + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:V2QI 1 "aligned_operand")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "SI") + ] + (const_string "HI"))) + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "8") + (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC") + (eq_attr "alternative" "9") + (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) + +;; We always round up to UNITS_PER_WORD bytes. +(define_insn "*pushv2qi2" + [(set (match_operand:V2QI 0 "push_operand" "=X,X") + (match_operand:V2QI 1 "nonmemory_no_elim_operand" "rC,v"))] + "" + "* return TARGET_64BIT ? \"push{q}\t%q1\" : \"push{l}\t%k1\"; + #" + [(set_attr "isa" "*,sse4") + (set_attr "type" "push,multi") + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else (match_test "TARGET_64BIT") + (const_string "DI") + (const_string "SI")) + (eq_attr "alternative" "1") + (if_then_else (match_test "TARGET_AVX512FP16") + (const_string "HI") + (const_string "TI")) + ] + (const_string "HI")))]) + +(define_split + [(set (match_operand:V2QI 0 "push_operand") + (match_operand:V2QI 1 "sse_reg_operand"))] + "TARGET_SSE4_1 && reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-PUSH_ROUNDING (GET_MODE_SIZE (V2QImode))); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) + +(define_expand "movmisalignv2qi" + [(set (match_operand:V2QI 0 "nonimmediate_operand") + (match_operand:V2QI 1 "nonimmediate_operand"))] + "" +{ + ix86_expand_vector_move (V2QImode, operands); + DONE; +}) + (define_insn "sse_movntq" [(set (match_operand:DI 0 "memory_operand" "=m,m") (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")] @@ -1461,6 +1629,58 @@ "TARGET_MMX_WITH_SSE" "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") +(define_insn "negv2qi2" + [(set (match_operand:V2QI 0 "register_operand" "=Q,&Yw") + (neg:V2QI + (match_operand:V2QI 1 "register_operand" "0,Yw"))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + [(set_attr "isa" "*,sse2") + (set_attr "type" "multi") + (set_attr "mode" "QI,TI")]) + +(define_split + [(set (match_operand:V2QI 0 "general_reg_operand") + (neg:V2QI + (match_operand:V2QI 1 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel + [(set (strict_low_part (match_dup 0)) + (neg:QI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (zero_extract:HI (match_dup 2) (const_int 8) (const_int 8)) + (subreg:HI + (neg:QI + (subreg:QI + (zero_extract:HI (match_dup 3) + (const_int 8) + (const_int 8)) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[3] = gen_lowpart (HImode, operands[1]); + operands[2] = gen_lowpart (HImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[0] = gen_lowpart (QImode, operands[0]); +}) + +(define_split + [(set (match_operand:V2QI 0 "sse_reg_operand") + (neg:V2QI + (match_operand:V2QI 1 "sse_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) + (minus:V4QI (match_dup 0) (match_dup 1)))] +{ + operands[2] = CONST0_RTX (V4QImode); + operands[1] = gen_lowpart (V4QImode, operands[1]); + operands[0] = gen_lowpart (V4QImode, operands[0]); +}) + (define_expand "mmx_<insn><mode>3" [(set (match_operand:MMXMODEI8 0 "register_operand") (plusminus:MMXMODEI8 @@ -1515,6 +1735,66 @@ (set_attr "type" "sseadd") (set_attr "mode" "TI")]) +(define_insn "<insn>v2qi3" + [(set (match_operand:V2QI 0 "register_operand" "=Q,x,Yw") + (plusminus:V2QI + (match_operand:V2QI 1 "register_operand" "<comm>0,0,Yw") + (match_operand:V2QI 2 "register_operand" "Q,x,Yw"))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + [(set_attr "isa" "*,sse2_noavx,avx") + (set_attr "type" "multi,sseadd,sseadd") + (set_attr "mode" "QI,TI,TI")]) + +(define_split + [(set (match_operand:V2QI 0 "general_reg_operand") + (plusminus:V2QI + (match_operand:V2QI 1 "general_reg_operand") + (match_operand:V2QI 2 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel + [(set (strict_low_part (match_dup 0)) + (plusminus:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (zero_extract:HI (match_dup 3) (const_int 8) (const_int 8)) + (subreg:HI + (plusminus:QI + (subreg:QI + (zero_extract:HI (match_dup 4) + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:HI (match_dup 5) + (const_int 8) + (const_int 8)) 0)) 0)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = gen_lowpart (HImode, operands[2]); + operands[4] = gen_lowpart (HImode, operands[1]); + operands[3] = gen_lowpart (HImode, operands[0]); + operands[2] = gen_lowpart (QImode, operands[2]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[0] = gen_lowpart (QImode, operands[0]); +}) + +(define_split + [(set (match_operand:V2QI 0 "sse_reg_operand") + (plusminus:V2QI + (match_operand:V2QI 1 "sse_reg_operand") + (match_operand:V2QI 2 "sse_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE2 && reload_completed" + [(set (match_dup 0) + (plusminus:V4QI (match_dup 1) (match_dup 2)))] +{ + operands[2] = gen_lowpart (V4QImode, operands[2]); + operands[1] = gen_lowpart (V4QImode, operands[1]); + operands[0] = gen_lowpart (V4QImode, operands[0]); +}) + (define_expand "mmx_<insn><mode>3" [(set (match_operand:MMXMODE12 0 "register_operand") (sat_plusminus:MMXMODE12 diff --git a/gcc/testsuite/gcc.dg/store_merging_18.c b/gcc/testsuite/gcc.dg/store_merging_18.c index 66e157e..fdff6b4 100644 --- a/gcc/testsuite/gcc.dg/store_merging_18.c +++ b/gcc/testsuite/gcc.dg/store_merging_18.c @@ -1,6 +1,6 @@ /* PR tree-optimization/83843 */ /* { dg-do run } */ -/* { dg-options "-O2 -fdump-tree-store-merging" } */ +/* { dg-options "-O2 -fno-tree-vectorize -fdump-tree-store-merging" } */ /* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { target { store_merge && { ! arm*-*-* } } } } } */ __attribute__((noipa)) void diff --git a/gcc/testsuite/gcc.dg/store_merging_29.c b/gcc/testsuite/gcc.dg/store_merging_29.c index 6b32aa9..e7afc9d 100644 --- a/gcc/testsuite/gcc.dg/store_merging_29.c +++ b/gcc/testsuite/gcc.dg/store_merging_29.c @@ -1,7 +1,7 @@ /* PR tree-optimization/88709 */ /* { dg-do run { target int32 } } */ /* { dg-require-effective-target store_merge } */ -/* { dg-options "-O2 -fdump-tree-store-merging-details" } */ +/* { dg-options "-O2 -fno-tree-vectorize -fdump-tree-store-merging-details" } */ /* { dg-final { scan-tree-dump "New sequence of 3 stores to replace old one of 6 stores" "store-merging" { target { le && { ! arm*-*-* } } } } } */ /* { dg-final { scan-tree-dump "New sequence of \[34] stores to replace old one of 6 stores" "store-merging" { target { be && { ! arm*-*-* } } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr103861.c b/gcc/testsuite/gcc.target/i386/pr103861.c new file mode 100644 index 0000000..1587176 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103861.c @@ -0,0 +1,23 @@ +/* PR target/103861 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -dp" } */ + +typedef char __v2qi __attribute__ ((__vector_size__ (2))); + +__v2qi and (__v2qi a, __v2qi b) { return a & b; }; + +__v2qi andn (__v2qi a, __v2qi b) { return a & ~b; }; + +__v2qi or (__v2qi a, __v2qi b) { return a | b; }; + +__v2qi xor (__v2qi a, __v2qi b) { return a ^ b; }; + +__v2qi not (__v2qi a) { return ~a; }; + +__v2qi plus (__v2qi a, __v2qi b) { return a + b; }; + +__v2qi minus (__v2qi a, __v2qi b) { return a - b; }; + +__v2qi neg (__v2qi a) { return -a; }; + +/* { dg-final { scan-assembler-not "insvhi" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c index ae6959e..d712922 100644 --- a/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c +++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512vl.c @@ -123,7 +123,7 @@ truncdb_128 (v16qi * dst, v4si * __restrict src) /* { dg-final { scan-assembler-times "vpmovqd" 2 } } */ /* { dg-final { scan-assembler-times "vpmovqw" 2 } } */ -/* { dg-final { scan-assembler-times "vpmovqb\[ \t]*%ymm" 1 } } */ -/* { dg-final { scan-assembler-times "vpmovqb\[ \t]*%xmm" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpmovqb\[ \t]*%ymm" 1 } } */ +/* { dg-final { scan-assembler-times "vpmovqb\[ \t]*%xmm" 1 } } */ /* { dg-final { scan-assembler-times "vpmovdw" 2 } } */ /* { dg-final { scan-assembler-times "vpmovdb" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c index a1cf9e7..4a76a7d 100644 --- a/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c +++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c @@ -81,7 +81,7 @@ bar_s8_s64 (v2di * dst, v16qi src) dst[0] = *(v2di *) tem; } -/* { dg-final { scan-assembler-times "pmovsxbq" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "pmovsxbq" 2 } } */ void foo_s16_s32 (v4si * dst, v8hi * __restrict src) diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4.c index 9fd2eee..4f655a3 100644 --- a/gcc/testsuite/gcc.target/i386/pr92658-sse4.c +++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4.c @@ -81,7 +81,7 @@ bar_u8_u64 (v2di * dst, v16qi src) dst[0] = *(v2di *) tem; } -/* { dg-final { scan-assembler-times "pmovzxbq" 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "pmovzxbq" 2 } } */ void foo_u16_u32 (v4si * dst, v8hi * __restrict src) diff --git a/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c b/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c index 15eb961..5e378b6 100644 --- a/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c +++ b/gcc/testsuite/gcc.target/i386/warn-vect-op-2.c @@ -11,8 +11,8 @@ int main (int argc, char *argv[]) argc, 1, 15, 38, 12, -1, argc, 2}; vector (16, signed char) res[] = { - v0 + v1, /* { dg-warning "expanded in parallel" } */ - v0 - v1, /* { dg-warning "expanded in parallel" } */ + v0 + v1, /* { dg-warning "expanded piecewise" } */ + v0 - v1, /* { dg-warning "expanded piecewise" } */ v0 > v1, /* { dg-warning "expanded piecewise" } */ v0 & v1, /* { dg-warning "expanded in parallel" } */ __builtin_shuffle (v0, v1), /* { dg-warning "expanded piecewise" } */ |