diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2012-05-09 20:06:47 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2012-05-09 20:06:47 +0200 |
commit | 20f9034bc38de755016d3858e23ee438abcfc80b (patch) | |
tree | 2fad70c416fc89d99cae46f4c14179415943c6c4 | |
parent | eac188c5bcb1ed4f3fd61de2de2590dece32be2b (diff) | |
download | gcc-20f9034bc38de755016d3858e23ee438abcfc80b.zip gcc-20f9034bc38de755016d3858e23ee438abcfc80b.tar.gz gcc-20f9034bc38de755016d3858e23ee438abcfc80b.tar.bz2 |
re PR target/44141 (Redundant loads and stores generated for AMD bdver1 target)
PR target/44141
* config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle
128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and
sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
or when optimizing for size.
* config/i386/sse.md (*mov<mode>_internal): Remove
TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code.
Calculate "mode" attribute according to optimize_function_for_size_p
and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag.
(*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template
depending on the mode of the instruction. Calculate "mode" attribute
according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES
and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags.
(*<sse2>_movdqu<avxsizesuffix>): Ditto.
From-SVN: r187347
-rw-r--r-- | gcc/ChangeLog | 26 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 113 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 74 |
3 files changed, 110 insertions, 103 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f32e96f..0d17b2a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2012-05-09 Uros Bizjak <ubizjak@gmail.com> + + PR target/44141 + * config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle + 128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and + sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + or when optimizing for size. + * config/i386/sse.md (*mov<mode>_internal): Remove + TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code. + Calculate "mode" attribute according to optimize_function_for_size_p + and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag. + (*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template + depending on the mode of the instruction. Calculate "mode" attribute + according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES + and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags. + (*<sse2>_movdqu<avxsizesuffix>): Ditto. + 2012-05-09 Georg-Johann Lay <avr@gjlay.de> PR target/53256 @@ -161,7 +178,7 @@ PR target/51244 * config/sh/sh.md (*branch_true, *branch_false): New insns. -2012-05-08 Teresa Johnson <tejohnson@google.com> +2012-05-08 Teresa Johnson <tejohnson@google.com> * gcov-io.h (__gcov_reset, __gcov_dump): Declare. * doc/gcov.texi: Add note on using __gcov_reset and __gcov_dump. @@ -180,8 +197,7 @@ (clone_function_name): Likewise. (cgraph_create_virtual_clone): Likewise. (cgraph_remove_node_and_inline_clones): Likewise. - (cgraph_redirect_edge_call_stmt_to_callee): Move here from - cgraphunit.c + (cgraph_redirect_edge_call_stmt_to_callee): Move here from cgraphunit.c * cgraph.h: Reorder declarations so they match file of origin. (cgraph_create_empty_node): Declare. * cgraphunit.c (update_call_expr): Move to cgraphclones.c @@ -702,7 +718,7 @@ Enable -Wunused-local-typedefs when -Wall or -Wunused is on * opts.c (finish_options): Activate -Wunused-local-typedefs if - -Wunused is activated. + -Wunused is activated. * doc/invoke.texi: Update blurb of -Wunused-local-typedefs. 2012-05-04 Andreas Krebbel <Andreas.Krebbel@de.ibm.com> @@ -1757,7 +1773,7 @@ * config/pa/pa.c (pa_legitimate_constant_p): Don't put function labels in constant pool. -2012-04-27 Ollie Wild <aaw@google.com> +2012-04-27 Ollie Wild <aaw@google.com> * doc/invoke.texi (Wliteral-suffix): Document new option. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6bb64e0..36370b2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15907,60 +15907,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) op0 = operands[0]; op1 = operands[1]; - if (TARGET_AVX) + if (TARGET_AVX + && GET_MODE_SIZE (mode) == 32) { switch (GET_MODE_CLASS (mode)) { case MODE_VECTOR_INT: case MODE_INT: - switch (GET_MODE_SIZE (mode)) - { - case 16: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - } - else - { - op0 = gen_lowpart (V16QImode, op0); - op1 = gen_lowpart (V16QImode, op1); - emit_insn (gen_sse2_movdqu (op0, op1)); - } - break; - case 32: - op0 = gen_lowpart (V32QImode, op0); - op1 = gen_lowpart (V32QImode, op1); - ix86_avx256_split_vector_move_misalign (op0, op1); - break; - default: - gcc_unreachable (); - } - break; + op0 = gen_lowpart (V32QImode, op0); + op1 = gen_lowpart (V32QImode, op1); + /* FALLTHRU */ + case MODE_VECTOR_FLOAT: - switch (mode) - { - case V4SFmode: - emit_insn (gen_sse_movups (op0, op1)); - break; - case V2DFmode: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - } - else - emit_insn (gen_sse2_movupd (op0, op1)); - break; - case V8SFmode: - case V4DFmode: - ix86_avx256_split_vector_move_misalign (op0, op1); - break; - default: - gcc_unreachable (); - } + ix86_avx256_split_vector_move_misalign (op0, op1); break; default: @@ -15972,16 +15931,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (MEM_P (op1)) { - /* If we're optimizing for size, movups is the smallest. */ - if (optimize_insn_for_size_p () - || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - return; - } - /* ??? If we have typed data, then it would appear that using movdqu is the only way to get unaligned data loaded with integer type. */ @@ -15989,16 +15938,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) { op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); + /* We will eventually emit movups based on insn attributes. */ emit_insn (gen_sse2_movdqu (op0, op1)); - return; } - - if (TARGET_SSE2 && mode == V2DFmode) + else if (TARGET_SSE2 && mode == V2DFmode) { rtx zero; - if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_function_for_size_p (cfun)) { + /* We will eventually emit movups based on insn attributes. */ emit_insn (gen_sse2_movupd (op0, op1)); return; } @@ -16030,7 +15982,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) } else { - if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_function_for_size_p (cfun)) { op0 = gen_lowpart (V4SFmode, op0); op1 = gen_lowpart (V4SFmode, op1); @@ -16045,6 +16000,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (mode != V4SFmode) op0 = gen_lowpart (V4SFmode, op0); + m = adjust_address (op1, V2SFmode, 0); emit_insn (gen_sse_loadlps (op0, op0, m)); m = adjust_address (op1, V2SFmode, 8); @@ -16053,30 +16009,20 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) } else if (MEM_P (op0)) { - /* If we're optimizing for size, movups is the smallest. */ - if (optimize_insn_for_size_p () - || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - { - op0 = gen_lowpart (V4SFmode, op0); - op1 = gen_lowpart (V4SFmode, op1); - emit_insn (gen_sse_movups (op0, op1)); - return; - } - - /* ??? Similar to above, only less clear - because of typeless stores. */ - if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES - && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) { op0 = gen_lowpart (V16QImode, op0); op1 = gen_lowpart (V16QImode, op1); + /* We will eventually emit movups based on insn attributes. */ emit_insn (gen_sse2_movdqu (op0, op1)); - return; } - - if (TARGET_SSE2 && mode == V2DFmode) + else if (TARGET_SSE2 && mode == V2DFmode) { - if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_STORE_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_function_for_size_p (cfun)) + /* We will eventually emit movups based on insn attributes. */ emit_insn (gen_sse2_movupd (op0, op1)); else { @@ -16091,7 +16037,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) if (mode != V4SFmode) op1 = gen_lowpart (V4SFmode, op1); - if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) + if (TARGET_AVX + || TARGET_SSE_UNALIGNED_STORE_OPTIMAL + || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL + || optimize_function_for_size_p (cfun)) { op0 = gen_lowpart (V4SFmode, op0); emit_insn (gen_sse_movups (op0, op1)); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d270c63..86b2ed3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -449,8 +449,6 @@ && (misaligned_operand (operands[0], <MODE>mode) || misaligned_operand (operands[1], <MODE>mode))) return "vmovupd\t{%1, %0|%0, %1}"; - else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; else return "%vmovapd\t{%1, %0|%0, %1}"; @@ -460,8 +458,6 @@ && (misaligned_operand (operands[0], <MODE>mode) || misaligned_operand (operands[1], <MODE>mode))) return "vmovdqu\t{%1, %0|%0, %1}"; - else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) - return "%vmovaps\t{%1, %0|%0, %1}"; else return "%vmovdqa\t{%1, %0|%0, %1}"; @@ -475,19 +471,21 @@ [(set_attr "type" "sselog1,ssemov,ssemov") (set_attr "prefix" "maybe_vex") (set (attr "mode") - (cond [(match_test "TARGET_AVX") + (cond [(and (eq_attr "alternative" "1,2") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) + (if_then_else + (match_test "GET_MODE_SIZE (<MODE>mode) > 16") + (const_string "V8SF") + (const_string "V4SF")) + (match_test "TARGET_AVX") (const_string "<sseinsnmode>") - (ior (ior (match_test "optimize_function_for_size_p (cfun)") - (not (match_test "TARGET_SSE2"))) + (ior (and (eq_attr "alternative" "1,2") + (match_test "optimize_function_for_size_p (cfun)")) (and (eq_attr "alternative" "2") (match_test "TARGET_SSE_TYPELESS_STORES"))) (const_string "V4SF") - (eq (const_string "<MODE>mode") (const_string "V4SFmode")) - (const_string "V4SF") - (eq (const_string "<MODE>mode") (const_string "V2DFmode")) - (const_string "V2DF") ] - (const_string "TI")))]) + (const_string "<sseinsnmode>")))]) (define_insn "sse2_movq128" [(set (match_operand:V2DI 0 "register_operand" "=x") @@ -597,11 +595,33 @@ [(match_operand:VF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}" +{ + switch (get_attr_mode (insn)) + { + case MODE_V8SF: + case MODE_V4SF: + return "%vmovups\t{%1, %0|%0, %1}"; + default: + return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"; + } +} [(set_attr "type" "ssemov") (set_attr "movu" "1") (set_attr "prefix" "maybe_vex") - (set_attr "mode" "<MODE>")]) + (set (attr "mode") + (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (if_then_else + (match_test "GET_MODE_SIZE (<MODE>mode) > 16") + (const_string "V8SF") + (const_string "V4SF")) + (match_test "TARGET_AVX") + (const_string "<MODE>") + (ior (match_test "optimize_function_for_size_p (cfun)") + (and (eq_attr "alternative" "1") + (match_test "TARGET_SSE_TYPELESS_STORES"))) + (const_string "V4SF") + ] + (const_string "<MODE>")))]) (define_expand "<sse2>_movdqu<avxsizesuffix>" [(set (match_operand:VI1 0 "nonimmediate_operand") @@ -618,7 +638,16 @@ (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "%vmovdqu\t{%1, %0|%0, %1}" +{ + switch (get_attr_mode (insn)) + { + case MODE_V8SF: + case MODE_V4SF: + return "%vmovups\t{%1, %0|%0, %1}"; + default: + return "%vmovdqu\t{%1, %0|%0, %1}"; + } +} [(set_attr "type" "ssemov") (set_attr "movu" "1") (set (attr "prefix_data16") @@ -627,7 +656,20 @@ (const_string "*") (const_string "1"))) (set_attr "prefix" "maybe_vex") - (set_attr "mode" "<sseinsnmode>")]) + (set (attr "mode") + (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (if_then_else + (match_test "GET_MODE_SIZE (<MODE>mode) > 16") + (const_string "V8SF") + (const_string "V4SF")) + (match_test "TARGET_AVX") + (const_string "<sseinsnmode>") + (ior (match_test "optimize_function_for_size_p (cfun)") + (and (eq_attr "alternative" "1") + (match_test "TARGET_SSE_TYPELESS_STORES"))) + (const_string "V4SF") + ] + (const_string "<sseinsnmode>")))]) (define_insn "<sse3>_lddqu<avxsizesuffix>" [(set (match_operand:VI1 0 "register_operand" "=x") |