aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl@gcc.gnu.org>2010-11-02 11:08:44 -0700
committerH.J. Lu <hjl@gcc.gnu.org>2010-11-02 11:08:44 -0700
commitcfec3a7c5e9924bbc5c756f8ba94b137f180845d (patch)
tree59669319f991c638bee0da8835955fbfd3f86fde
parent12243af614b0bddb0119a4860fccc27f54e92ed7 (diff)
downloadgcc-cfec3a7c5e9924bbc5c756f8ba94b137f180845d.zip
gcc-cfec3a7c5e9924bbc5c756f8ba94b137f180845d.tar.gz
gcc-cfec3a7c5e9924bbc5c756f8ba94b137f180845d.tar.bz2
Emit vzerouppers after reload.
gcc/ 2010-11-02 Uros Bizjak <ubizjak@gmail.com> H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386-protos.h (ix86_split_call_vzeroupper): New. (ix86_split_call_pop_vzeroupper): Likewise. * config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite the loop. (ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER. (ix86_split_call_vzeroupper): New. (ix86_split_call_pop_vzeroupper): Likewise. * config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New. (*call_pop_0_vzeroupper): Likewise. (*call_pop_1_vzeroupper): Likewise. (*sibcall_pop_1_vzeroupper): Likewise. (*call_0_vzeroupper): Likewise. (*call_1_vzeroupper): Likewise. (*sibcall_1_vzeroupper): Likewise. (*call_1_rex64_vzeroupper): Likewise. (*call_1_rex64_ms_sysv_vzeroupper): New. (*call_1_rex64_large_vzeroupper): Likewise. (*sibcall_1_rex64_vzeroupper): Likewise. (*call_value_pop_0_vzeroupper): New. (*call_value_pop_1_vzeroupper): Likewise. (*sibcall_value_pop_1_vzeroupper): Likewise. (*call_value_0_vzeroupper): New. (*call_value_0_rex64_vzeroupper): Use (*call_value_0_rex64_ms_sysv_vzeroupper): Likewise. (*call_value_1_vzeroupper): Likewise. (*sibcall_value_1_vzeroupper): Likewise. (*call_value_1_rex64_vzeroupper): Likewise. (*call_value_1_rex64_ms_sysv_vzeroupper): Likewise. (*call_value_1_rex64_large_vzeroupper): Likewise. (*sibcall_value_1_rex64_vzeroupper): Likewise. gcc/testsuite/ 2010-11-02 H.J. Lu <hongjiu.lu@intel.com> * gcc.target/i386/avx-vzeroupper-15.c: New. * gcc.target/i386/avx-vzeroupper-16.c: Likewise. * gcc.target/i386/avx-vzeroupper-17.c: Likewise. * gcc.target/i386/avx-vzeroupper-18.c: Likewise. PR target/46253 * gcc.target/i386/pr46253.c: New. From-SVN: r166208
-rw-r--r--gcc/ChangeLog36
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c202
-rw-r--r--gcc/config/i386/i386.md339
-rw-r--r--gcc/testsuite/ChangeLog10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/pr46253.c15
10 files changed, 563 insertions, 111 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9c50ffe..a852af3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,39 @@
+2010-11-02 Uros Bizjak <ubizjak@gmail.com>
+ H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386-protos.h (ix86_split_call_vzeroupper): New.
+ (ix86_split_call_pop_vzeroupper): Likewise.
+
+ * config/i386/i386.c (move_or_delete_vzeroupper_2): Rewrite
+ the loop.
+ (ix86_expand_call): Use UNSPEC_CALL_NEEDS_VZEROUPPER.
+ (ix86_split_call_vzeroupper): New.
+ (ix86_split_call_pop_vzeroupper): Likewise.
+
+ * config/i386/i386.md (UNSPEC_CALL_NEEDS_VZEROUPPER): New.
+ (*call_pop_0_vzeroupper): Likewise.
+ (*call_pop_1_vzeroupper): Likewise.
+ (*sibcall_pop_1_vzeroupper): Likewise.
+ (*call_0_vzeroupper): Likewise.
+ (*call_1_vzeroupper): Likewise.
+ (*sibcall_1_vzeroupper): Likewise.
+ (*call_1_rex64_vzeroupper): Likewise.
+ (*call_1_rex64_ms_sysv_vzeroupper): New.
+ (*call_1_rex64_large_vzeroupper): Likewise.
+ (*sibcall_1_rex64_vzeroupper): Likewise.
+ (*call_value_pop_0_vzeroupper): New.
+ (*call_value_pop_1_vzeroupper): Likewise.
+ (*sibcall_value_pop_1_vzeroupper): Likewise.
+ (*call_value_0_vzeroupper): New.
+ (*call_value_0_rex64_vzeroupper): Use
+ (*call_value_0_rex64_ms_sysv_vzeroupper): Likewise.
+ (*call_value_1_vzeroupper): Likewise.
+ (*sibcall_value_1_vzeroupper): Likewise.
+ (*call_value_1_rex64_vzeroupper): Likewise.
+ (*call_value_1_rex64_ms_sysv_vzeroupper): Likewise.
+ (*call_value_1_rex64_large_vzeroupper): Likewise.
+ (*sibcall_value_1_rex64_vzeroupper): Likewise.
+
2010-11-02 Ian Lance Taylor <iant@google.com>
PR lto/46273
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 13b1394..c64135b 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -119,6 +119,8 @@ extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
+extern void ix86_split_call_vzeroupper (rtx, rtx);
+extern void ix86_split_call_pop_vzeroupper (rtx, rtx);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
extern rtx ix86_zero_extend_to_Pmode (rtx);
extern void ix86_split_long_move (rtx[]);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index bda8ed3..a5beb83 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -108,163 +108,119 @@ check_avx256_stores (rtx dest, const_rtx set, void *data)
static void
move_or_delete_vzeroupper_2 (basic_block bb, bool upper_128bits_set)
{
- rtx curr_insn, next_insn, prev_insn, insn;
+ rtx insn;
+ rtx vzeroupper_insn = NULL_RTX;
+ rtx pat;
+ int avx256;
if (dump_file)
fprintf (dump_file, " BB [%i] entry: upper 128bits: %d\n",
bb->index, upper_128bits_set);
- for (curr_insn = BB_HEAD (bb);
- curr_insn && curr_insn != NEXT_INSN (BB_END (bb));
- curr_insn = next_insn)
+ insn = BB_HEAD (bb);
+ while (insn != BB_END (bb))
{
- int avx256;
+ insn = NEXT_INSN (insn);
- next_insn = NEXT_INSN (curr_insn);
-
- if (!NONDEBUG_INSN_P (curr_insn))
+ if (!NONDEBUG_INSN_P (insn))
continue;
- /* Search for vzeroupper. */
- insn = PATTERN (curr_insn);
- if (GET_CODE (insn) == UNSPEC_VOLATILE
- && XINT (insn, 1) == UNSPECV_VZEROUPPER)
+ /* Move vzeroupper before jump/call. */
+ if (JUMP_P (insn) || CALL_P (insn))
+ {
+ if (!vzeroupper_insn)
+ continue;
+
+ if (PREV_INSN (insn) != vzeroupper_insn)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Move vzeroupper after:\n");
+ print_rtl_single (dump_file, PREV_INSN (insn));
+ fprintf (dump_file, "before:\n");
+ print_rtl_single (dump_file, insn);
+ }
+ reorder_insns_nobb (vzeroupper_insn, vzeroupper_insn,
+ PREV_INSN (insn));
+ }
+ vzeroupper_insn = NULL_RTX;
+ continue;
+ }
+
+ pat = PATTERN (insn);
+
+ /* Check insn for vzeroupper intrinsic. */
+ if (GET_CODE (pat) == UNSPEC_VOLATILE
+ && XINT (pat, 1) == UNSPECV_VZEROUPPER)
{
- /* Found vzeroupper. */
if (dump_file)
{
+ /* Found vzeroupper intrinsic. */
fprintf (dump_file, "Found vzeroupper:\n");
- print_rtl_single (dump_file, curr_insn);
+ print_rtl_single (dump_file, insn);
}
}
else
{
- /* Check vzeroall intrinsic. */
- if (GET_CODE (insn) == PARALLEL
- && GET_CODE (XVECEXP (insn, 0, 0)) == UNSPEC_VOLATILE
- && XINT (XVECEXP (insn, 0, 0), 1) == UNSPECV_VZEROALL)
- upper_128bits_set = false;
- else if (!upper_128bits_set)
+ /* Check insn for vzeroall intrinsic. */
+ if (GET_CODE (pat) == PARALLEL
+ && GET_CODE (XVECEXP (pat, 0, 0)) == UNSPEC_VOLATILE
+ && XINT (XVECEXP (pat, 0, 0), 1) == UNSPECV_VZEROALL)
{
- /* Check if upper 128bits of AVX registers are used. */
- note_stores (insn, check_avx256_stores,
- &upper_128bits_set);
+ upper_128bits_set = false;
+
+ /* Delete pending vzeroupper insertion. */
+ if (vzeroupper_insn)
+ {
+ delete_insn (vzeroupper_insn);
+ vzeroupper_insn = NULL_RTX;
+ }
}
+ else if (!upper_128bits_set)
+ note_stores (pat, check_avx256_stores, &upper_128bits_set);
continue;
}
- avx256 = INTVAL (XVECEXP (insn, 0, 0));
+ /* Process vzeroupper intrinsic. */
+ avx256 = INTVAL (XVECEXP (pat, 0, 0));
if (!upper_128bits_set)
{
/* Since the upper 128bits are cleared, callee must not pass
256bit AVX register. We only need to check if callee
returns 256bit AVX register. */
- upper_128bits_set = avx256 == callee_return_avx256;
+ upper_128bits_set = (avx256 == callee_return_avx256);
- /* Remove unnecessary vzeroupper since upper 128bits are
- cleared. */
+ /* Remove unnecessary vzeroupper since
+ upper 128bits are cleared. */
if (dump_file)
{
fprintf (dump_file, "Delete redundant vzeroupper:\n");
- print_rtl_single (dump_file, curr_insn);
+ print_rtl_single (dump_file, insn);
}
- delete_insn (curr_insn);
- continue;
+ delete_insn (insn);
}
else if (avx256 == callee_return_pass_avx256
|| avx256 == callee_pass_avx256)
{
/* Callee passes 256bit AVX register. Check if callee
returns 256bit AVX register. */
- upper_128bits_set = avx256 == callee_return_pass_avx256;
+ upper_128bits_set = (avx256 == callee_return_pass_avx256);
- /* Must remove vzeroupper since callee passes 256bit AVX
- register. */
+ /* Must remove vzeroupper since
+ callee passes in 256bit AVX register. */
if (dump_file)
{
fprintf (dump_file, "Delete callee pass vzeroupper:\n");
- print_rtl_single (dump_file, curr_insn);
- }
- delete_insn (curr_insn);
- continue;
- }
-
- /* Find the jump after vzeroupper. */
- prev_insn = curr_insn;
- if (avx256 == vzeroupper_intrinsic)
- {
- /* For vzeroupper intrinsic, check if there is another
- vzeroupper. */
- insn = NEXT_INSN (curr_insn);
- while (insn)
- {
- if (NONJUMP_INSN_P (insn)
- && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
- && XINT (PATTERN (insn), 1) == UNSPECV_VZEROUPPER)
- {
- if (dump_file)
- {
- fprintf (dump_file,
- "Delete redundant vzeroupper intrinsic:\n");
- print_rtl_single (dump_file, curr_insn);
- }
- delete_insn (curr_insn);
- insn = NULL;
- continue;
- }
-
- if (JUMP_P (insn) || CALL_P (insn))
- break;
- prev_insn = insn;
- insn = NEXT_INSN (insn);
- if (insn == NEXT_INSN (BB_END (bb)))
- break;
+ print_rtl_single (dump_file, insn);
}
-
- /* Continue if redundant vzeroupper intrinsic is deleted. */
- if (!insn)
- continue;
+ delete_insn (insn);
}
else
{
- /* Find the next jump/call. */
- insn = NEXT_INSN (curr_insn);
- while (insn)
- {
- if (JUMP_P (insn) || CALL_P (insn))
- break;
- prev_insn = insn;
- insn = NEXT_INSN (insn);
- if (insn == NEXT_INSN (BB_END (bb)))
- break;
- }
-
- if (!insn)
- gcc_unreachable();
+ upper_128bits_set = false;
+ vzeroupper_insn = insn;
}
-
- /* Keep vzeroupper. */
- upper_128bits_set = false;
-
- /* Also allow label as the next instruction. */
- if (insn == NEXT_INSN (BB_END (bb)) && !LABEL_P (insn))
- gcc_unreachable();
-
- /* Move vzeroupper before jump/call if neeeded. */
- if (curr_insn != prev_insn)
- {
- reorder_insns_nobb (curr_insn, curr_insn, prev_insn);
- if (dump_file)
- {
- fprintf (dump_file, "Move vzeroupper after:\n");
- print_rtl_single (dump_file, prev_insn);
- fprintf (dump_file, "before:\n");
- print_rtl_single (dump_file, insn);
- }
- }
-
- next_insn = NEXT_INSN (insn);
}
BLOCK_INFO (bb)->upper_128bits_set = upper_128bits_set;
@@ -21565,10 +21521,12 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
+ 2, vec));
}
- /* Emit vzeroupper if needed. */
+ /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
if (TARGET_VZEROUPPER && cfun->machine->use_avx256_p)
{
+ rtx unspec;
int avx256;
+
cfun->machine->use_vzeroupper_p = 1;
if (cfun->machine->callee_pass_avx256_p)
{
@@ -21581,7 +21539,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
avx256 = callee_return_avx256;
else
avx256 = call_no_avx256;
- emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
+
+ unspec = gen_rtx_UNSPEC (VOIDmode,
+ gen_rtvec (1, GEN_INT (avx256)),
+ UNSPEC_CALL_NEEDS_VZEROUPPER);
+ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, unspec));
}
call = emit_call_insn (call);
@@ -21591,6 +21553,24 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
return call;
}
+void
+ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
+{
+ rtx call = XVECEXP (PATTERN (insn), 0, 0);
+ emit_insn (gen_avx_vzeroupper (vzeroupper));
+ emit_call_insn (call);
+}
+
+void
+ix86_split_call_pop_vzeroupper (rtx insn, rtx vzeroupper)
+{
+ rtx call = XVECEXP (PATTERN (insn), 0, 0);
+ rtx pop = XVECEXP (PATTERN (insn), 0, 1);
+ emit_insn (gen_avx_vzeroupper (vzeroupper));
+ emit_call_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (2, call, pop)));
+}
+
/* Output the assembly for a call instruction. */
const char *
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index feaf781..278bd77 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -105,6 +105,7 @@
UNSPEC_LD_MPIC ; load_macho_picbase
UNSPEC_TRUNC_NOOP
UNSPEC_DIV_ALREADY_SPLIT
+ UNSPEC_CALL_NEEDS_VZEROUPPER
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
@@ -11260,6 +11261,21 @@
DONE;
})
+(define_insn_and_split "*call_pop_0_vzeroupper"
+ [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 2 "immediate_operand" "")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*call_pop_0"
[(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
(match_operand:SI 1 "" ""))
@@ -11275,6 +11291,21 @@
}
[(set_attr "type" "call")])
+(define_insn_and_split "*call_pop_1_vzeroupper"
+ [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 2 "immediate_operand" "i")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*call_pop_1"
[(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
(match_operand:SI 1 "" ""))
@@ -11289,6 +11320,21 @@
}
[(set_attr "type" "call")])
+(define_insn_and_split "*sibcall_pop_1_vzeroupper"
+ [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 2 "immediate_operand" "i,i")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_pop_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*sibcall_pop_1"
[(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
(match_operand:SI 1 "" ""))
@@ -11321,6 +11367,18 @@
DONE;
})
+(define_insn_and_split "*call_0_vzeroupper"
+ [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+ (match_operand 1 "" ""))
+ (unspec [(match_operand 2 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*call_0"
[(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
(match_operand 1 "" ""))]
@@ -11328,6 +11386,18 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
+(define_insn_and_split "*call_1_vzeroupper"
+ [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
+ (match_operand 1 "" ""))
+ (unspec [(match_operand 2 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*call_1"
[(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm"))
(match_operand 1 "" ""))]
@@ -11335,6 +11405,18 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
+(define_insn_and_split "*sibcall_1_vzeroupper"
+ [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
+ (match_operand 1 "" ""))
+ (unspec [(match_operand 2 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*sibcall_1"
[(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U"))
(match_operand 1 "" ""))]
@@ -11342,6 +11424,19 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
+(define_insn_and_split "*call_1_rex64_vzeroupper"
+ [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+ (match_operand 1 "" ""))
+ (unspec [(match_operand 2 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+ && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*call_1_rex64"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
(match_operand 1 "" ""))]
@@ -11350,6 +11445,32 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
+(define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper"
+ [(parallel
+ [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+ (match_operand 1 "" ""))
+ (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+ (clobber (reg:TI XMM6_REG))
+ (clobber (reg:TI XMM7_REG))
+ (clobber (reg:TI XMM8_REG))
+ (clobber (reg:TI XMM9_REG))
+ (clobber (reg:TI XMM10_REG))
+ (clobber (reg:TI XMM11_REG))
+ (clobber (reg:TI XMM12_REG))
+ (clobber (reg:TI XMM13_REG))
+ (clobber (reg:TI XMM14_REG))
+ (clobber (reg:TI XMM15_REG))
+ (clobber (reg:DI SI_REG))
+ (clobber (reg:DI DI_REG))])
+ (unspec [(match_operand 2 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*call_1_rex64_ms_sysv"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
(match_operand 1 "" ""))
@@ -11370,6 +11491,18 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
+(define_insn_and_split "*call_1_rex64_large_vzeroupper"
+ [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+ (match_operand 1 "" ""))
+ (unspec [(match_operand 2 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*call_1_rex64_large"
[(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
(match_operand 1 "" ""))]
@@ -11377,6 +11510,18 @@
{ return ix86_output_call_insn (insn, operands[0], 0); }
[(set_attr "type" "call")])
+(define_insn_and_split "*sibcall_1_rex64_vzeroupper"
+ [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
+ (match_operand 1 "" ""))
+ (unspec [(match_operand 2 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[2]); DONE;"
+ [(set_attr "type" "call")])
+
(define_insn "*sibcall_1_rex64"
[(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U"))
(match_operand 1 "" ""))]
@@ -17123,6 +17268,22 @@
;; Call-value patterns last so that the wildcard operand does not
;; disrupt insn-recog's switch tables.
+(define_insn_and_split "*call_value_pop_0_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "immediate_operand" "")))
+ (unspec [(match_operand 4 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_pop_0"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
@@ -17134,6 +17295,22 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*call_value_pop_1_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "immediate_operand" "i")))
+ (unspec [(match_operand 4 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_pop_1"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
@@ -17145,6 +17322,22 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*sibcall_value_pop_1_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "immediate_operand" "i,i")))
+ (unspec [(match_operand 4 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_pop_vzeroupper (curr_insn, operands[4]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*sibcall_value_pop_1"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
@@ -17156,6 +17349,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*call_value_0_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+ (match_operand:SI 2 "" "")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_0"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
@@ -17164,6 +17370,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*call_value_0_rex64_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+ (match_operand:DI 2 "const_int_operand" "")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_0_rex64"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
@@ -17172,6 +17391,33 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*call_value_0_rex64_ms_sysv_vzeroupper"
+ [(parallel
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+ (match_operand:DI 2 "const_int_operand" "")))
+ (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+ (clobber (reg:TI XMM6_REG))
+ (clobber (reg:TI XMM7_REG))
+ (clobber (reg:TI XMM8_REG))
+ (clobber (reg:TI XMM9_REG))
+ (clobber (reg:TI XMM10_REG))
+ (clobber (reg:TI XMM11_REG))
+ (clobber (reg:TI XMM12_REG))
+ (clobber (reg:TI XMM13_REG))
+ (clobber (reg:TI XMM14_REG))
+ (clobber (reg:TI XMM15_REG))
+ (clobber (reg:DI SI_REG))
+ (clobber (reg:DI DI_REG))])
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_0_rex64_ms_sysv"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
@@ -17193,6 +17439,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*call_value_1_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
+ (match_operand:SI 2 "" "")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_1"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm"))
@@ -17201,6 +17460,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*sibcall_value_1_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
+ (match_operand:SI 2 "" "")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && !TARGET_64BIT && SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*sibcall_value_1"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U"))
@@ -17209,6 +17481,20 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*call_value_1_rex64_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+ (match_operand:DI 2 "" "")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)
+ && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_1_rex64"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
@@ -17218,6 +17504,33 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper"
+ [(parallel
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+ (match_operand:DI 2 "" "")))
+ (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+ (clobber (reg:TI XMM6_REG))
+ (clobber (reg:TI XMM7_REG))
+ (clobber (reg:TI XMM8_REG))
+ (clobber (reg:TI XMM9_REG))
+ (clobber (reg:TI XMM10_REG))
+ (clobber (reg:TI XMM11_REG))
+ (clobber (reg:TI XMM12_REG))
+ (clobber (reg:TI XMM13_REG))
+ (clobber (reg:TI XMM14_REG))
+ (clobber (reg:TI XMM15_REG))
+ (clobber (reg:DI SI_REG))
+ (clobber (reg:DI DI_REG))])
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_1_rex64_ms_sysv"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
@@ -17239,6 +17552,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*call_value_1_rex64_large_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+ (match_operand:DI 2 "" "")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && !SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*call_value_1_rex64_large"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
@@ -17247,6 +17573,19 @@
{ return ix86_output_call_insn (insn, operands[1], 1); }
[(set_attr "type" "callv")])
+(define_insn_and_split "*sibcall_value_1_rex64_vzeroupper"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
+ (match_operand:DI 2 "" "")))
+ (unspec [(match_operand 3 "const_int_operand" "")]
+ UNSPEC_CALL_NEEDS_VZEROUPPER)]
+ "TARGET_VZEROUPPER && TARGET_64BIT && SIBLING_CALL_P (insn)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+ "ix86_split_call_vzeroupper (curr_insn, operands[3]); DONE;"
+ [(set_attr "type" "callv")])
+
(define_insn "*sibcall_value_1_rex64"
[(set (match_operand 0 "" "")
(call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U"))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2fcf2b2..c484117 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,13 @@
+2010-11-02 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/avx-vzeroupper-15.c: New.
+ * gcc.target/i386/avx-vzeroupper-16.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-17.c: Likewise.
+ * gcc.target/i386/avx-vzeroupper-18.c: Likewise.
+
+ PR target/46253
+ * gcc.target/i386/pr46253.c: New.
+
2010-11-02 Steven G. Kargl < kargl@gcc.gnu.org>
Tobias Burnus <burnus@net-b.de>
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c
new file mode 100644
index 0000000..134a3dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx -mtune=generic -dp" } */
+
+#include <immintrin.h>
+
+extern __m256 x, y;
+extern void (*bar) (void);
+
+void
+foo ()
+{
+ x = y;
+ bar ();
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c
new file mode 100644
index 0000000..3fb099d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-16.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern __m256 __attribute__ ((sysv_abi)) bar (__m256);
+
+void
+foo (void)
+{
+ bar (x);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "\\*call_value_0_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c
new file mode 100644
index 0000000..2f3cfd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-17.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern __m256 __attribute__ ((sysv_abi)) (*bar) (__m256);
+
+void
+foo (void)
+{
+ bar (x);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "\\*call_value_1_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c
new file mode 100644
index 0000000..541f77d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-18.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O0 -mavx -mabi=ms -mtune=generic -dp" } */
+
+typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__));
+
+extern __m256 x;
+
+extern void __attribute__ ((sysv_abi)) bar (__m256);
+
+void
+foo (void)
+{
+ bar (x);
+}
+
+/* { dg-final { scan-assembler-not "avx_vzeroupper" } } */
+/* { dg-final { scan-assembler-times "\\*call_1_rex64_ms_sysv" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr46253.c b/gcc/testsuite/gcc.target/i386/pr46253.c
new file mode 100644
index 0000000..406790a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr46253.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O -g -mf16c -mtune=generic -dp" } */
+
+typedef __m256i __attribute__ ((__vector_size__ (32)));
+
+__m256i bar (void);
+void foo (void)
+{
+ int i = 0;
+ bar ();
+ __builtin_ia32_vzeroupper ();
+ while (++i);
+}
+
+/* { dg-final { scan-assembler-times "avx_vzeroupper" 1 } } */