diff options
author | H.J. Lu <hjl@gcc.gnu.org> | 2010-11-02 11:08:44 -0700 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2010-11-02 11:08:44 -0700 |
commit | cfec3a7c5e9924bbc5c756f8ba94b137f180845d (patch) | |
tree | 59669319f991c638bee0da8835955fbfd3f86fde /gcc | |
parent | 12243af614b0bddb0119a4860fccc27f54e92ed7 (diff) | |
download | gcc-cfec3a7c5e9924bbc5c756f8ba94b137f180845d.zip gcc-cfec3a7c5e9924bbc5c756f8ba94b137f180845d.tar.gz gcc-cfec3a7c5e9924bbc5c756f8ba94b137f180845d.tar.bz2 |
Emit vzerouppers after reload.
gcc/
2010-11-02 Uros Bizjak <ubizjak@gmail.com>
H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386-protos.h (ix86_split_call_vzeroupper): New.
(ix86_split_call_pop_vzeroupper): Likewise.
* config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite
the loop.
(ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER.
(ix86_split_call_vzeroupper): New.
(ix86_split_call_pop_vzeroupper): Likewise.
* config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New.
(*call_pop_0_vzeroupper): Likewise.
(*call_pop_1_vzeroupper): Likewise.
(*sibcall_pop_1_vzeroupper): Likewise.
(*call_0_vzeroupper): Likewise.
(*call_1_vzeroupper): Likewise.
(*sibcall_1_vzeroupper): Likewise.
(*call_1_rex64_vzeroupper): Likewise.
(*call_1_rex64_ms_sysv_vzeroupper): New.
(*call_1_rex64_large_vzeroupper): Likewise.
(*sibcall_1_rex64_vzeroupper): Likewise.
(*call_value_pop_0_vzeroupper): New.
(*call_value_pop_1_vzeroupper): Likewise.
(*sibcall_value_pop_1_vzeroupper): Likewise.
(*call_value_0_vzeroupper): New.
(*call_value_0_rex64_vzeroupper): Use
(*call_value_0_rex64_ms_sysv_vzeroupper): Likewise.
(*call_value_1_vzeroupper): Likewise.
(*sibcall_value_1_vzeroupper): Likewise.
(*call_value_1_rex64_vzeroupper): Likewise.
(*call_value_1_rex64_ms_sysv_vzeroupper): Likewise.
(*call_value_1_rex64_large_vzeroupper): Likewise.
(*sibcall_value_1_rex64_vzeroupper): Likewise.
gcc/testsuite/
2010-11-02 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/avx-vzeroupper-15.c: New.
* gcc.target/i386/avx-vzeroupper-16.c: Likewise.
* gcc.target/i386/avx-vzeroupper-17.c: Likewise.
* gcc.target/i386/avx-vzeroupper-18.c: Likewise.
PR target/46253
* gcc.target/i386/pr46253.c: New.
From-SVN: r166208
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 36 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 202 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 339 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr46253.c | 15 |
10 files changed, 563 insertions, 111 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9c50ffe..a852af3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,39 @@ +2010-11-02 Uros Bizjak <ubizjak@gmail.com> + H.J. Lu <hongjiu.lu@intel.com> + + * config/i386/i386-protos.h (ix86_split_call_vzeroupper): New. + (ix86_split_call_pop_vzeroupper): Likewise. + + * config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite + the loop. + (ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER. + (ix86_split_call_vzeroupper): New. + (ix86_split_call_pop_vzeroupper): Likewise. + + * config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New. + (*call_pop_0_vzeroupper): Likewise. + (*call_pop_1_vzeroupper): Likewise. + (*sibcall_pop_1_vzeroupper): Likewise. + (*call_0_vzeroupper): Likewise. + (*call_1_vzeroupper): Likewise. + (*sibcall_1_vzeroupper): Likewise. + (*call_1_rex64_vzeroupper): Likewise. + (*call_1_rex64_ms_sysv_vzeroupper): New. + (*call_1_rex64_large_vzeroupper): Likewise. + (*sibcall_1_rex64_vzeroupper): Likewise. + (*call_value_pop_0_vzeroupper): New. + (*call_value_pop_1_vzeroupper): Likewise. + (*sibcall_value_pop_1_vzeroupper): Likewise. + (*call_value_0_vzeroupper): New. + (*call_value_0_rex64_vzeroupper): Use + (*call_value_0_rex64_ms_sysv_vzeroupper): Likewise. + (*call_value_1_vzeroupper): Likewise. + (*sibcall_value_1_vzeroupper): Likewise. + (*call_value_1_rex64_vzeroupper): Likewise. + (*call_value_1_rex64_ms_sysv_vzeroupper): Likewise. + (*call_value_1_rex64_large_vzeroupper): Likewise. + (*sibcall_value_1_rex64_vzeroupper): Likewise. + 2010-11-02 Ian Lance Taylor <iant@google.com> PR lto/46273 diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 13b1394..c64135b 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -119,6 +119,8 @@ extern void ix86_expand_sse_unpack (rtx[], bool, bool); extern void ix86_expand_sse4_unpack (rtx[], bool, bool); extern bool ix86_expand_int_addcc (rtx[]); extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); +extern void ix86_split_call_vzeroupper (rtx, rtx); +extern void ix86_split_call_pop_vzeroupper (rtx, rtx); extern void x86_initialize_trampoline (rtx, rtx, rtx); extern rtx ix86_zero_extend_to_Pmode (rtx); extern void ix86_split_long_move (rtx[]); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index bda8ed3..a5beb83 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -108,163 +108,119 @@ check_avx256_stores (rtx dest, const_rtx set, void *data) static void move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set) { - rtx curr_insn, next_insn, prev_insn, insn; + rtx insn; + rtx vzeroupper_insn = NULL_RTX; + rtx pat; + int avx256; if (dump_file) fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n", bb->index, upper_128bits_set); - for (curr_insn = BB_HEAD (bb); - curr_insn && curr_insn != NEXT_INSN (BB_END (bb)); - curr_insn = next_insn) + insn = BB_HEAD (bb); + while (insn != BB_END (bb)) { - int avx256; + insn = NEXT_INSN (insn); - next_insn = NEXT_INSN (curr_insn); - - if (!NONDEBUG_INSN_P (curr_insn)) + if (!NONDEBUG_INSN_P (insn)) continue; - /* Search for vzeroupper. */ - insn = PATTERN (curr_insn); - if (GET_CODE (insn) == UNSPEC_VOLATILE - && XINT (insn, 1) == UNSPECV_VZEROUPPER) + /* Move vzeroupper before jump/call. */ + if (JUMP_P (insn) || CALL_P (insn)) + { + if (!vzeroupper_insn) + continue; + + if (PREV_INSN (insn) != vzeroupper_insn) + { + if (dump_file) + { + fprintf (dump_file, "Move vzeroupper after:\n"); + print_rtl_single (dump_file, PREV_INSN (insn)); + fprintf (dump_file, "before:\n"); + print_rtl_single (dump_file, insn); + } + reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn, + PREV_INSN (insn)); + } + vzeroupper_insn = NULL_RTX; + continue; + } + + pat = PATTERN (insn); + + /* Check insn for vzeroupper intrinsic. */ + if (GET_CODE (pat) == UNSPEC_VOLATILE + && XINT (pat, 1) == UNSPECV_VZEROUPPER) { - /* Found vzeroupper. */ if (dump_file) { + /* Found vzeroupper intrinsic. */ fprintf (dump_file, "Found vzeroupper:\n"); - print_rtl_single (dump_file, curr_insn); + print_rtl_single (dump_file, insn); } } else { - /* Check vzeroall intrinsic. */ - if (GET_CODE (insn) == PARALLEL - && GET_CODE (XVECEXP (insn, 0, 0)) == UNSPEC_VOLATILE - && XINT (XVECEXP (insn, 0, 0), 1) == UNSPECV_VZEROALL) - upper_128bits_set = false; - else if (!upper_128bits_set) + /* Check insn for vzeroall intrinsic. */ + if (GET_CODE (pat) == PARALLEL + && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE + && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL) { - /* Check if upper 128bits of AVX registers are used. */ - note_stores (insn, check_avx256_stores, - &upper_128bits_set); + upper_128bits_set = false; + + /* Delete pending vzeroupper insertion. */ + if (vzeroupper_insn) + { + delete_insn (vzeroupper_insn); + vzeroupper_insn = NULL_RTX; + } } + else if (!upper_128bits_set) + note_stores (pat, check_avx256_stores, &upper_128bits_set); continue; } - avx256 = INTVAL (XVECEXP (insn, 0, 0)); + /* Process vzeroupper intrinsic. */ + avx256 = INTVAL (XVECEXP (pat, 0, 0)); if (!upper_128bits_set) { /* Since the upper 128bits are cleared, callee must not pass 256bit AVX register. We only need to check if callee returns 256bit AVX register. */ - upper_128bits_set = avx256 == callee_return_avx256; + upper_128bits_set = (avx256 == callee_return_avx256); - /* Remove unnecessary vzeroupper since upper 128bits are - cleared. */ + /* Remove unnecessary vzeroupper since + upper 128bits are cleared. */ if (dump_file) { fprintf (dump_file, "Delete redundant vzeroupper:\n"); - print_rtl_single (dump_file, curr_insn); + print_rtl_single (dump_file, insn); } - delete_insn (curr_insn); - continue; + delete_insn (insn); } else if (avx256 == callee_return_pass_avx256 || avx256 == callee_pass_avx256) { /* Callee passes 256bit AVX register. Check if callee returns 256bit AVX register. */ - upper_128bits_set = avx256 == callee_return_pass_avx256; + upper_128bits_set = (avx256 == callee_return_pass_avx256); - /* Must remove vzeroupper since callee passes 256bit AVX - register. */ + /* Must remove vzeroupper since + callee passes in 256bit AVX register. */ if (dump_file) { fprintf (dump_file, "Delete callee pass vzeroupper:\n"); - print_rtl_single (dump_file, curr_insn); - } - delete_insn (curr_insn); - continue; - } - - /* Find the jump after vzeroupper. */ - prev_insn = curr_insn; - if (avx256 == vzeroupper_intrinsic) - { - /* For vzeroupper intrinsic, check if there is another - vzeroupper. */ - insn = NEXT_INSN (curr_insn); - while (insn) - { - if (NONJUMP_INSN_P (insn) - && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE - && XINT (PATTERN (insn), 1) == UNSPECV_VZEROUPPER) - { - if (dump_file) - { - fprintf (dump_file, - "Delete redundant vzeroupper intrinsic:\n"); - print_rtl_single (dump_file, curr_insn); - } - delete_insn (curr_insn); - insn = NULL; - continue; - } - - if (JUMP_P (insn) || CALL_P (insn)) - break; - prev_insn = insn; - insn = NEXT_INSN (insn); - if (insn == NEXT_INSN (BB_END (bb))) - break; + print_rtl_single (dump_file, insn); } - - /* Continue if redundant vzeroupper intrinsic is deleted. */ - if (!insn) - continue; + delete_insn (insn); } else { - /* Find the next jump/call. */ - insn = NEXT_INSN (curr_insn); - while (insn) - { - if (JUMP_P (insn) || CALL_P (insn)) - break; - prev_insn = insn; - insn = NEXT_INSN (insn); - if (insn == NEXT_INSN (BB_END (bb))) - break; - } - - if (!insn) - gcc_unreachable(); + upper_128bits_set = false; + vzeroupper_insn = insn; } - - /* Keep vzeroupper. */ - upper_128bits_set = false; - - /* Also allow label as the next instruction. */ - if (insn == NEXT_INSN (BB_END (bb)) && !LABEL_P (insn)) - gcc_unreachable(); - - /* Move vzeroupper before jump/call if neeeded. */ - if (curr_insn != prev_insn) - { - reorder_insns_nobb (curr_insn, curr_insn, prev_insn); - if (dump_file) - { - fprintf (dump_file, "Move vzeroupper after:\n"); - print_rtl_single (dump_file, prev_insn); - fprintf (dump_file, "before:\n"); - print_rtl_single (dump_file, insn); - } - } - - next_insn = NEXT_INSN (insn); } BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set; @@ -21565,10 +21521,12 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, + 2, vec)); } - /* Emit vzeroupper if needed. */ + /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */ if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p) { + rtx unspec; int avx256; + cfun->machine->use_vzeroupper_p = 1; if (cfun->machine->callee_pass_avx256_p) { @@ -21581,7 +21539,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, avx256 = callee_return_avx256; else avx256 = call_no_avx256; - emit_insn (gen_avx_vzeroupper (GEN_INT (avx256))); + + unspec = gen_rtx_UNSPEC (VOIDmode, + gen_rtvec (1, GEN_INT (avx256)), + UNSPEC_CALL_NEEDS_VZEROUPPER); + call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, unspec)); } call = emit_call_insn (call); @@ -21591,6 +21553,24 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, return call; } +void +ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper) +{ + rtx call = XVECEXP (PATTERN (insn), 0, 0); + emit_insn (gen_avx_vzeroupper (vzeroupper)); + emit_call_insn (call); +} + +void +ix86_split_call_pop_vzeroupper (rtx insn, rtx vzeroupper) +{ + rtx call = XVECEXP (PATTERN (insn), 0, 0); + rtx pop = XVECEXP (PATTERN (insn), 0, 1); + emit_insn (gen_avx_vzeroupper (vzeroupper)); + emit_call_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, call, pop))); +} + /* Output the assembly for a call instruction. */ const char * diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index feaf781..278bd77 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -105,6 +105,7 @@ UNSPEC_LD_MPIC ; load_macho_picbase UNSPEC_TRUNC_NOOP UNSPEC_DIV_ALREADY_SPLIT + UNSPEC_CALL_NEEDS_VZEROUPPER ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -11260,6 +11261,21 @@ DONE; }) +(define_insn_and_split "*call_pop_0_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_pop_0" [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) (match_operand:SI 1 "" "")) @@ -11275,6 +11291,21 @@ } [(set_attr "type" "call")]) +(define_insn_and_split "*call_pop_1_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i"))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_pop_1" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) (match_operand:SI 1 "" "")) @@ -11289,6 +11320,21 @@ } [(set_attr "type" "call")]) +(define_insn_and_split "*sibcall_pop_1_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i,i"))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*sibcall_pop_1" [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) (match_operand:SI 1 "" "")) @@ -11321,6 +11367,18 @@ DONE; }) +(define_insn_and_split "*call_0_vzeroupper" + [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_0" [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) (match_operand 1 "" ""))] @@ -11328,6 +11386,18 @@ { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) +(define_insn_and_split "*call_1_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_1" [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) (match_operand 1 "" ""))] @@ -11335,6 +11405,18 @@ { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) +(define_insn_and_split "*sibcall_1_vzeroupper" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*sibcall_1" [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) (match_operand 1 "" ""))] @@ -11342,6 +11424,19 @@ { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) +(define_insn_and_split "*call_1_rex64_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_1_rex64" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) (match_operand 1 "" ""))] @@ -11350,6 +11445,32 @@ { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) +(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper" + [(parallel + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" "")) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_1_rex64_ms_sysv" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) (match_operand 1 "" "")) @@ -11370,6 +11491,18 @@ { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) +(define_insn_and_split "*call_1_rex64_large_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*call_1_rex64_large" [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) (match_operand 1 "" ""))] @@ -11377,6 +11510,18 @@ { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) +(define_insn_and_split "*sibcall_1_rex64_vzeroupper" + [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U")) + (match_operand 1 "" "")) + (unspec [(match_operand 2 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;" + [(set_attr "type" "call")]) + (define_insn "*sibcall_1_rex64" [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U")) (match_operand 1 "" ""))] @@ -17123,6 +17268,22 @@ ;; Call-value patterns last so that the wildcard operand does not ;; disrupt insn-recog's switch tables. +(define_insn_and_split "*call_value_pop_0_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" ""))) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_pop_0" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) @@ -17134,6 +17295,22 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*call_value_pop_1_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i"))) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_pop_1" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) @@ -17145,6 +17322,22 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*sibcall_value_pop_1_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i,i"))) + (unspec [(match_operand 4 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*sibcall_value_pop_1" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) @@ -17156,6 +17349,19 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*call_value_0_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_0" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) @@ -17164,6 +17370,19 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*call_value_0_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_0_rex64" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) @@ -17172,6 +17391,33 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_0_rex64_ms_sysv" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) @@ -17193,6 +17439,19 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*call_value_1_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_1" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) @@ -17201,6 +17460,19 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*sibcall_value_1_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (match_operand:SI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*sibcall_value_1" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) @@ -17209,6 +17481,20 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*call_value_1_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_1_rex64" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) @@ -17218,6 +17504,33 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper" + [(parallel + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))]) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_1_rex64_ms_sysv" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) @@ -17239,6 +17552,19 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*call_value_1_rex64_large_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*call_value_1_rex64_large" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) @@ -17247,6 +17573,19 @@ { return ix86_output_call_insn (insn, operands[1], 1); } [(set_attr "type" "callv")]) +(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U")) + (match_operand:DI 2 "" ""))) + (unspec [(match_operand 3 "const_int_operand" "")] + UNSPEC_CALL_NEEDS_VZEROUPPER)] + "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;" + [(set_attr "type" "callv")]) + (define_insn "*sibcall_value_1_rex64" [(set (match_operand 0 "" "") (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U")) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2fcf2b2..c484117 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2010-11-02 H.J. Lu <hongjiu.lu@intel.com> + + * gcc.target/i386/avx-vzeroupper-15.c: New. + * gcc.target/i386/avx-vzeroupper-16.c: Likewise. + * gcc.target/i386/avx-vzeroupper-17.c: Likewise. + * gcc.target/i386/avx-vzeroupper-18.c: Likewise. + + PR target/46253 + * gcc.target/i386/pr46253.c: New. + 2010-11-02 Steven G. Kargl < kargl@gcc.gnu.org> Tobias Burnus <burnus@net-b.de> diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c new file mode 100644 index 0000000..134a3dd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx -mtune=generic -dp" } */ + +#include <immintrin.h> + +extern __m256 x, y; +extern void (*bar) (void); + +void +foo () +{ + x = y; + bar (); +} + +/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c new file mode 100644 index 0000000..3fb099d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */ + +typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__)); + +extern __m256 x; + +extern __m256 __attribute__ ((sysv_abi)) bar (__m256); + +void +foo (void) +{ + bar (x); +} + +/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ +/* { dg-final { scan-assembler-times "\\*call_value_0_rex64_ms_sysv" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c new file mode 100644 index 0000000..2f3cfd2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */ + +typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__)); + +extern __m256 x; + +extern __m256 __attribute__ ((sysv_abi)) (*bar) (__m256); + +void +foo (void) +{ + bar (x); +} + +/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ +/* { dg-final { scan-assembler-times "\\*call_value_1_rex64_ms_sysv" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c new file mode 100644 index 0000000..541f77d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */ + +typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__)); + +extern __m256 x; + +extern void __attribute__ ((sysv_abi)) bar (__m256); + +void +foo (void) +{ + bar (x); +} + +/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */ +/* { dg-final { scan-assembler-times "\\*call_1_rex64_ms_sysv" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr46253.c b/gcc/testsuite/gcc.target/i386/pr46253.c new file mode 100644 index 0000000..406790a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr46253.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O -g -mf16c -mtune=generic -dp" } */ + +typedef __m256i __attribute__ ((__vector_size__ (32))); + +__m256i bar (void); +void foo (void) +{ + int i = 0; + bar (); + __builtin_ia32_vzeroupper (); + while (++i); +} + +/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */ |