aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2011-03-27 18:56:00 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2011-03-27 11:56:00 -0700
commitd253656a7be7461c2fc7bb638e93b9943a91de9b (patch)
tree6bde920e9a82430f7109eaf06eaa946470377eb2 /gcc/config
parentc570af00f629db0876fcdbc5fb7ab12440b11d16 (diff)
downloadgcc-d253656a7be7461c2fc7bb638e93b9943a91de9b.zip
gcc-d253656a7be7461c2fc7bb638e93b9943a91de9b.tar.gz
gcc-d253656a7be7461c2fc7bb638e93b9943a91de9b.tar.bz2
Split 32-byte AVX unaligned load/store.
gcc/ 2011-03-27 H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. (ix86_option_override_internal): Split 32-byte AVX unaligned load/store by default. (ix86_avx256_split_vector_move_misalign): New. (ix86_expand_vector_move_misalign): Use it. * config/i386/i386.opt: Add -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. * config/i386/sse.md (*avx_mov<mode>_internal): Verify unaligned 256bit load/store. Generate unaligned store on misaligned memory operand. (*avx_movu<ssemodesuffix><avxmodesuffix>): Verify unaligned 256bit load/store. (*avx_movdqu<avxmodesuffix>): Likewise. * doc/invoke.texi: Document -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. gcc/testsuite/ 2011-03-27 H.J. Lu <hongjiu.lu@intel.com> * gcc.target/i386/avx256-unaligned-load-1.c: New. * gcc.target/i386/avx256-unaligned-load-2.c: Likewise. * gcc.target/i386/avx256-unaligned-load-3.c: Likewise. * gcc.target/i386/avx256-unaligned-load-4.c: Likewise. * gcc.target/i386/avx256-unaligned-load-5.c: Likewise. * gcc.target/i386/avx256-unaligned-load-6.c: Likewise. * gcc.target/i386/avx256-unaligned-load-7.c: Likewise. * gcc.target/i386/avx256-unaligned-store-1.c: Likewise. * gcc.target/i386/avx256-unaligned-store-2.c: Likewise. * gcc.target/i386/avx256-unaligned-store-3.c: Likewise. * gcc.target/i386/avx256-unaligned-store-4.c: Likewise. * gcc.target/i386/avx256-unaligned-store-5.c: Likewise. * gcc.target/i386/avx256-unaligned-store-6.c: Likewise. * gcc.target/i386/avx256-unaligned-store-7.c: Likewise. From-SVN: r171578
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386.c76
-rw-r--r--gcc/config/i386/i386.opt8
-rw-r--r--gcc/config/i386/sse.md42
3 files changed, 113 insertions, 13 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4e8ca69..a4ca762 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3130,6 +3130,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
{ "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
{ "-m8bit-idiv", MASK_USE_8BIT_IDIV },
{ "-mvzeroupper", MASK_VZEROUPPER },
+ { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
+ { "-mavx256-split-unaligned-stroe", MASK_AVX256_SPLIT_UNALIGNED_STORE},
};
const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
@@ -4274,11 +4276,18 @@ ix86_option_override_internal (bool main_args_p)
if (TARGET_AVX)
{
/* When not optimize for size, enable vzeroupper optimization for
- TARGET_AVX with -fexpensive-optimizations. */
- if (!optimize_size
- && flag_expensive_optimizations
- && !(target_flags_explicit & MASK_VZEROUPPER))
- target_flags |= MASK_VZEROUPPER;
+ TARGET_AVX with -fexpensive-optimizations and split 32-byte
+ AVX unaligned load/store. */
+ if (!optimize_size)
+ {
+ if (flag_expensive_optimizations
+ && !(target_flags_explicit & MASK_VZEROUPPER))
+ target_flags |= MASK_VZEROUPPER;
+ if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+ target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
+ if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+ target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+ }
}
else
{
@@ -15588,6 +15597,57 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
}
+/* Split 32-byte AVX unaligned load and store if needed. */
+
+static void
+ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
+{
+ rtx m;
+ rtx (*extract) (rtx, rtx, rtx);
+ rtx (*move_unaligned) (rtx, rtx);
+ enum machine_mode mode;
+
+ switch (GET_MODE (op0))
+ {
+ default:
+ gcc_unreachable ();
+ case V32QImode:
+ extract = gen_avx_vextractf128v32qi;
+ move_unaligned = gen_avx_movdqu256;
+ mode = V16QImode;
+ break;
+ case V8SFmode:
+ extract = gen_avx_vextractf128v8sf;
+ move_unaligned = gen_avx_movups256;
+ mode = V4SFmode;
+ break;
+ case V4DFmode:
+ extract = gen_avx_vextractf128v4df;
+ move_unaligned = gen_avx_movupd256;
+ mode = V2DFmode;
+ break;
+ }
+
+ if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ {
+ rtx r = gen_reg_rtx (mode);
+ m = adjust_address (op1, mode, 0);
+ emit_move_insn (r, m);
+ m = adjust_address (op1, mode, 16);
+ r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
+ emit_move_insn (op0, r);
+ }
+ else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ {
+ m = adjust_address (op0, mode, 0);
+ emit_insn (extract (m, op1, const0_rtx));
+ m = adjust_address (op0, mode, 16);
+ emit_insn (extract (m, op1, const1_rtx));
+ }
+ else
+ emit_insn (move_unaligned (op0, op1));
+}
+
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
straight to ix86_expand_vector_move. */
/* Code generation for scalar reg-reg moves of single and double precision data:
@@ -15672,7 +15732,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
case 32:
op0 = gen_lowpart (V32QImode, op0);
op1 = gen_lowpart (V32QImode, op1);
- emit_insn (gen_avx_movdqu256 (op0, op1));
+ ix86_avx256_split_vector_move_misalign (op0, op1);
break;
default:
gcc_unreachable ();
@@ -15688,7 +15748,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
emit_insn (gen_avx_movups (op0, op1));
break;
case V8SFmode:
- emit_insn (gen_avx_movups256 (op0, op1));
+ ix86_avx256_split_vector_move_misalign (op0, op1);
break;
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
@@ -15701,7 +15761,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
emit_insn (gen_avx_movupd (op0, op1));
break;
case V4DFmode:
- emit_insn (gen_avx_movupd256 (op0, op1));
+ ix86_avx256_split_vector_move_misalign (op0, op1);
break;
default:
gcc_unreachable ();
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index e02d098..f63a406 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -420,3 +420,11 @@ Emit profiling counter call at function entry before prologue.
m8bit-idiv
Target Report Mask(USE_8BIT_IDIV) Save
Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check
+
+mavx256-split-unaligned-load
+Target Report Mask(AVX256_SPLIT_UNALIGNED_LOAD) Save
+Split 32-byte AVX unaligned load
+
+mavx256-split-unaligned-store
+Target Report Mask(AVX256_SPLIT_UNALIGNED_STORE) Save
+Split 32-byte AVX unaligned store
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 70a0b34..de11f73 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -203,19 +203,35 @@
return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
+ if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
+ && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
+ && misaligned_operand (operands[0], <MODE>mode))
+ || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
+ && misaligned_operand (operands[1], <MODE>mode))))
+ gcc_unreachable ();
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
- return "vmovaps\t{%1, %0|%0, %1}";
+ if (misaligned_operand (operands[0], <MODE>mode)
+ || misaligned_operand (operands[1], <MODE>mode))
+ return "vmovups\t{%1, %0|%0, %1}";
+ else
+ return "vmovaps\t{%1, %0|%0, %1}";
case MODE_V4DF:
case MODE_V2DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ if (misaligned_operand (operands[0], <MODE>mode)
+ || misaligned_operand (operands[1], <MODE>mode))
+ return "vmovupd\t{%1, %0|%0, %1}";
+ else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "vmovaps\t{%1, %0|%0, %1}";
else
return "vmovapd\t{%1, %0|%0, %1}";
default:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ if (misaligned_operand (operands[0], <MODE>mode)
+ || misaligned_operand (operands[1], <MODE>mode))
+ return "vmovdqu\t{%1, %0|%0, %1}";
+ else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "vmovaps\t{%1, %0|%0, %1}";
else
return "vmovdqa\t{%1, %0|%0, %1}";
@@ -400,7 +416,15 @@
UNSPEC_MOVU))]
"AVX_VEC_FLOAT_MODE_P (<MODE>mode)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
+{
+ if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
+ && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
+ && misaligned_operand (operands[0], <MODE>mode))
+ || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
+ && misaligned_operand (operands[1], <MODE>mode))))
+ gcc_unreachable ();
+ return "vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "vex")
@@ -459,7 +483,15 @@
[(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "vmovdqu\t{%1, %0|%0, %1}"
+{
+ if (GET_MODE_ALIGNMENT (<MODE>mode) == 256
+ && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE
+ && misaligned_operand (operands[0], <MODE>mode))
+ || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
+ && misaligned_operand (operands[1], <MODE>mode))))
+ gcc_unreachable ();
+ return "vmovdqu\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "vex")