diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2011-03-27 18:56:00 +0000 |
---|---|---|
committer | H.J. Lu <hjl@gcc.gnu.org> | 2011-03-27 11:56:00 -0700 |
commit | d253656a7be7461c2fc7bb638e93b9943a91de9b (patch) | |
tree | 6bde920e9a82430f7109eaf06eaa946470377eb2 /gcc/config | |
parent | c570af00f629db0876fcdbc5fb7ab12440b11d16 (diff) | |
download | gcc-d253656a7be7461c2fc7bb638e93b9943a91de9b.zip gcc-d253656a7be7461c2fc7bb638e93b9943a91de9b.tar.gz gcc-d253656a7be7461c2fc7bb638e93b9943a91de9b.tar.bz2 |
Split 32-byte AVX unaligned load/store.
gcc/
2011-03-27 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load
and -mavx256-split-unaligned-store.
(ix86_option_override_internal): Split 32-byte AVX unaligned
load/store by default.
(ix86_avx256_split_vector_move_misalign): New.
(ix86_expand_vector_move_misalign): Use it.
* config/i386/i386.opt: Add -mavx256-split-unaligned-load and
-mavx256-split-unaligned-store.
* config/i386/sse.md (*avx_mov<mode>_internal): Verify unaligned
256bit load/store. Generate unaligned store on misaligned memory
operand.
(*avx_movu<ssemodesuffix><avxmodesuffix>): Verify unaligned
256bit load/store.
(*avx_movdqu<avxmodesuffix>): Likewise.
* doc/invoke.texi: Document -mavx256-split-unaligned-load and
-mavx256-split-unaligned-store.
gcc/testsuite/
2011-03-27 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/avx256-unaligned-load-1.c: New.
* gcc.target/i386/avx256-unaligned-load-2.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-3.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-4.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-5.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-6.c: Likewise.
* gcc.target/i386/avx256-unaligned-load-7.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-1.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-2.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-4.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-5.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-6.c: Likewise.
* gcc.target/i386/avx256-unaligned-store-7.c: Likewise.
From-SVN: r171578
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386.c | 76 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 8 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 42 |
3 files changed, 113 insertions, 13 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4e8ca69..a4ca762 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3130,6 +3130,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS }, { "-m8bit-idiv", MASK_USE_8BIT_IDIV }, { "-mvzeroupper", MASK_VZEROUPPER }, + { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD}, + { "-mavx256-split-unaligned-stroe", MASK_AVX256_SPLIT_UNALIGNED_STORE}, }; const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2]; @@ -4274,11 +4276,18 @@ ix86_option_override_internal (bool main_args_p) if (TARGET_AVX) { /* When not optimize for size, enable vzeroupper optimization for - TARGET_AVX with -fexpensive-optimizations. */ - if (!optimize_size - && flag_expensive_optimizations - && !(target_flags_explicit & MASK_VZEROUPPER)) - target_flags |= MASK_VZEROUPPER; + TARGET_AVX with -fexpensive-optimizations and split 32-byte + AVX unaligned load/store. */ + if (!optimize_size) + { + if (flag_expensive_optimizations + && !(target_flags_explicit & MASK_VZEROUPPER)) + target_flags |= MASK_VZEROUPPER; + if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) + target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; + if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE)) + target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; + } } else { @@ -15588,6 +15597,57 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); } +/* Split 32-byte AVX unaligned load and store if needed. */ + +static void +ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) +{ + rtx m; + rtx (*extract) (rtx, rtx, rtx); + rtx (*move_unaligned) (rtx, rtx); + enum machine_mode mode; + + switch (GET_MODE (op0)) + { + default: + gcc_unreachable (); + case V32QImode: + extract = gen_avx_vextractf128v32qi; + move_unaligned = gen_avx_movdqu256; + mode = V16QImode; + break; + case V8SFmode: + extract = gen_avx_vextractf128v8sf; + move_unaligned = gen_avx_movups256; + mode = V4SFmode; + break; + case V4DFmode: + extract = gen_avx_vextractf128v4df; + move_unaligned = gen_avx_movupd256; + mode = V2DFmode; + break; + } + + if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD) + { + rtx r = gen_reg_rtx (mode); + m = adjust_address (op1, mode, 0); + emit_move_insn (r, m); + m = adjust_address (op1, mode, 16); + r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); + emit_move_insn (op0, r); + } + else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE) + { + m = adjust_address (op0, mode, 0); + emit_insn (extract (m, op1, const0_rtx)); + m = adjust_address (op0, mode, 16); + emit_insn (extract (m, op1, const1_rtx)); + } + else + emit_insn (move_unaligned (op0, op1)); +} + /* Implement the movmisalign patterns for SSE. Non-SSE modes go straight to ix86_expand_vector_move. */ /* Code generation for scalar reg-reg moves of single and double precision data: @@ -15672,7 +15732,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) case 32: op0 = gen_lowpart (V32QImode, op0); op1 = gen_lowpart (V32QImode, op1); - emit_insn (gen_avx_movdqu256 (op0, op1)); + ix86_avx256_split_vector_move_misalign (op0, op1); break; default: gcc_unreachable (); @@ -15688,7 +15748,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) emit_insn (gen_avx_movups (op0, op1)); break; case V8SFmode: - emit_insn (gen_avx_movups256 (op0, op1)); + ix86_avx256_split_vector_move_misalign (op0, op1); break; case V2DFmode: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) @@ -15701,7 +15761,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) emit_insn (gen_avx_movupd (op0, op1)); break; case V4DFmode: - emit_insn (gen_avx_movupd256 (op0, op1)); + ix86_avx256_split_vector_move_misalign (op0, op1); break; default: gcc_unreachable (); diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index e02d098..f63a406 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -420,3 +420,11 @@ Emit profiling counter call at function entry before prologue. m8bit-idiv Target Report Mask(USE_8BIT_IDIV) Save Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check + +mavx256-split-unaligned-load +Target Report Mask(AVX256_SPLIT_UNALIGNED_LOAD) Save +Split 32-byte AVX unaligned load + +mavx256-split-unaligned-store +Target Report Mask(AVX256_SPLIT_UNALIGNED_STORE) Save +Split 32-byte AVX unaligned store diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 70a0b34..de11f73 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -203,19 +203,35 @@ return standard_sse_constant_opcode (insn, operands[1]); case 1: case 2: + if (GET_MODE_ALIGNMENT (<MODE>mode) == 256 + && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE + && misaligned_operand (operands[0], <MODE>mode)) + || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && misaligned_operand (operands[1], <MODE>mode)))) + gcc_unreachable (); switch (get_attr_mode (insn)) { case MODE_V8SF: case MODE_V4SF: - return "vmovaps\t{%1, %0|%0, %1}"; + if (misaligned_operand (operands[0], <MODE>mode) + || misaligned_operand (operands[1], <MODE>mode)) + return "vmovups\t{%1, %0|%0, %1}"; + else + return "vmovaps\t{%1, %0|%0, %1}"; case MODE_V4DF: case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + if (misaligned_operand (operands[0], <MODE>mode) + || misaligned_operand (operands[1], <MODE>mode)) + return "vmovupd\t{%1, %0|%0, %1}"; + else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) return "vmovaps\t{%1, %0|%0, %1}"; else return "vmovapd\t{%1, %0|%0, %1}"; default: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + if (misaligned_operand (operands[0], <MODE>mode) + || misaligned_operand (operands[1], <MODE>mode)) + return "vmovdqu\t{%1, %0|%0, %1}"; + else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) return "vmovaps\t{%1, %0|%0, %1}"; else return "vmovdqa\t{%1, %0|%0, %1}"; @@ -400,7 +416,15 @@ UNSPEC_MOVU))] "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}" +{ + if (GET_MODE_ALIGNMENT (<MODE>mode) == 256 + && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE + && misaligned_operand (operands[0], <MODE>mode)) + || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && misaligned_operand (operands[1], <MODE>mode)))) + gcc_unreachable (); + return "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") (set_attr "movu" "1") (set_attr "prefix" "vex") @@ -459,7 +483,15 @@ [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "vmovdqu\t{%1, %0|%0, %1}" +{ + if (GET_MODE_ALIGNMENT (<MODE>mode) == 256 + && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE + && misaligned_operand (operands[0], <MODE>mode)) + || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && misaligned_operand (operands[1], <MODE>mode)))) + gcc_unreachable (); + return "vmovdqu\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") (set_attr "movu" "1") (set_attr "prefix" "vex") |