aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2012-10-15 20:42:03 +0200
committerUros Bizjak <uros@gcc.gnu.org>2012-10-15 20:42:03 +0200
commit860f5e77b7244a305026eb257d575a4856edfbe4 (patch)
tree4c405e48e40b6eb6f9cdca5bf11f51769cb73d2a /gcc
parent81833173943fec6dec7287b475303e9d22387386 (diff)
downloadgcc-860f5e77b7244a305026eb257d575a4856edfbe4.zip
gcc-860f5e77b7244a305026eb257d575a4856edfbe4.tar.gz
gcc-860f5e77b7244a305026eb257d575a4856edfbe4.tar.bz2
sse.md (UNSPEC_MOVU): Remove.
* config/i386/sse.md (UNSPEC_MOVU): Remove. (UNSPEC_LOADU): New. (UNSPEC_STOREU): Ditto. (<sse>_movu<ssemodesuffix><avxsizesuffix>): Split to ... (<sse>_loadu<ssemodesuffix><avxsizesuffix>): ... this and ... (<sse>_storeu<ssemodesuffix><avxsizesuffix>) ... this. (<sse2>_movdqu<avxsizesuffix>): Split to ... (<sse2>_loaddqu<avxsizesuffix>): ... this and ... (<sse2>_storedqu<avxsizesuffix>): ... this. (*sse4_2_pcmpestr_unaligned): Update. (*sse4_2_pcmpistr_unaligned): Ditto. * config/i386/i386.c (ix86_avx256_split_vector_move_misalign): Use gen_avx_load{dqu,ups,upd}256 to load from unaligned memory and gen_avx_store{dqu,ups,upd}256 to store to unaligned memory. (ix86_expand_vector_move_misalign): Use gen_sse_loadups or gen_sse2_load{dqu,upd} to load from unaligned memory and gen_sse_loadups or gen_sse2_store{dqu,upd}256 to store to unaligned memory. (struct builtin_description bdesc_spec) <IX86_BUILTIN_LOADUPS>: Use CODE_FOR_sse_loadups. <IX86_BUILTIN_LOADUPD>: Use CODE_FOR_sse2_loadupd. <IX86_BUILTIN_LOADDQU>: Use CODE_FOR_sse2_loaddqu. <IX86_BUILTIN_STOREUPS>: Use CODE_FOR_sse_storeups. <IX86_BUILTIN_STOREUPD>: Use CODE_FOR_sse2_storeupd. <IX86_BUILTIN_STOREDQU>: Use CODE_FOR_sse2_storedqu. <IX86_BUILTIN_LOADUPS256>: Use CODE_FOR_avx_loadups256. <IX86_BUILTIN_LOADUPD256>: Use CODE_FOR_avx_loadupd256. <IX86_BUILTIN_LOADDQU256>: Use CODE_FOR_avx_loaddqu256. <IX86_BUILTIN_STOREUPS256>: Use CODE_FOR_avx_storeups256. <IX86_BUILTIN_STOREUPD256>: Use CODE_FOR_avx_storeupd256. <IX86_BUILTIN_STOREDQU256>: Use CODE_FOR_avx_storedqu256. testsuite/ChangeLog: * gcc.target/i386/avx256-unaligned-load-1.c: Update asm scan patterns. * gcc.target/i386/avx256-unaligned-load-2.c: Ditto. * gcc.target/i386/avx256-unaligned-load-3.c: Ditto. * gcc.target/i386/avx256-unaligned-load-4.c: Ditto. * gcc.target/i386/avx256-unaligned-store-1.c: Ditto. * gcc.target/i386/avx256-unaligned-store-2.c: Ditto. * gcc.target/i386/avx256-unaligned-store-3.c: Ditto. * gcc.target/i386/avx256-unaligned-store-4.c: Ditto. From-SVN: r192468
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog35
-rw-r--r--gcc/config/i386/i386.c84
-rw-r--r--gcc/config/i386/sse.md103
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c4
11 files changed, 183 insertions, 65 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 20ed440..f646294 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,38 @@
+2012-10-13 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/sse.md (UNSPEC_MOVU): Remove.
+ (UNSPEC_LOADU): New.
+ (UNSPEC_STOREU): Ditto.
+ (<sse>_movu<ssemodesuffix><avxsizesuffix>): Split to ...
+ (<sse>_loadu<ssemodesuffix><avxsizesuffix>): ... this and ...
+ (<sse>_storeu<ssemodesuffix><avxsizesuffix>) ... this.
+ (<sse2>_movdqu<avxsizesuffix>): Split to ...
+ (<sse2>_loaddqu<avxsizesuffix>): ... this and ...
+ (<sse2>_storedqu<avxsizesuffix>): ... this.
+ (*sse4_2_pcmpestr_unaligned): Update.
+ (*sse4_2_pcmpistr_unaligned): Ditto.
+
+ * config/i386/i386.c (ix86_avx256_split_vector_move_misalign): Use
+ gen_avx_load{dqu,ups,upd}256 to load from unaligned memory and
+ gen_avx_store{dqu,ups,upd}256 to store to unaligned memory.
+ (ix86_expand_vector_move_misalign): Use gen_sse_loadups or
+ gen_sse2_load{dqu,upd} to load from unaligned memory and
+ gen_sse_loadups or gen_sse2_store{dqu,upd}256 to store to
+ unaligned memory.
+ (struct builtin_description bdesc_spec) <IX86_BUILTIN_LOADUPS>:
+ Use CODE_FOR_sse_loadups.
+ <IX86_BUILTIN_LOADUPD>: Use CODE_FOR_sse2_loadupd.
+ <IX86_BUILTIN_LOADDQU>: Use CODE_FOR_sse2_loaddqu.
+ <IX86_BUILTIN_STOREUPS>: Use CODE_FOR_sse_storeups.
+ <IX86_BUILTIN_STOREUPD>: Use CODE_FOR_sse2_storeupd.
+ <IX86_BUILTIN_STOREDQU>: Use CODE_FOR_sse2_storedqu.
+ <IX86_BUILTIN_LOADUPS256>: Use CODE_FOR_avx_loadups256.
+ <IX86_BUILTIN_LOADUPD256>: Use CODE_FOR_avx_loadupd256.
+ <IX86_BUILTIN_LOADDQU256>: Use CODE_FOR_avx_loaddqu256.
+ <IX86_BUILTIN_STOREUPS256>: Use CODE_FOR_avx_storeups256.
+ <IX86_BUILTIN_STOREUPD256>: Use CODE_FOR_avx_storeupd256.
+ <IX86_BUILTIN_STOREDQU256>: Use CODE_FOR_avx_storedqu256.
+
2012-10-15 Dodji Seketeli <dodji@redhat.com>
* alias.c: Cleanup comments.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c10e49458..c345c20 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -16059,7 +16059,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
{
rtx m;
rtx (*extract) (rtx, rtx, rtx);
- rtx (*move_unaligned) (rtx, rtx);
+ rtx (*load_unaligned) (rtx, rtx);
+ rtx (*store_unaligned) (rtx, rtx);
enum machine_mode mode;
switch (GET_MODE (op0))
@@ -16068,39 +16069,52 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
gcc_unreachable ();
case V32QImode:
extract = gen_avx_vextractf128v32qi;
- move_unaligned = gen_avx_movdqu256;
+ load_unaligned = gen_avx_loaddqu256;
+ store_unaligned = gen_avx_storedqu256;
mode = V16QImode;
break;
case V8SFmode:
extract = gen_avx_vextractf128v8sf;
- move_unaligned = gen_avx_movups256;
+ load_unaligned = gen_avx_loadups256;
+ store_unaligned = gen_avx_storeups256;
mode = V4SFmode;
break;
case V4DFmode:
extract = gen_avx_vextractf128v4df;
- move_unaligned = gen_avx_movupd256;
+ load_unaligned = gen_avx_loadupd256;
+ store_unaligned = gen_avx_storeupd256;
mode = V2DFmode;
break;
}
- if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ if (MEM_P (op1))
{
- rtx r = gen_reg_rtx (mode);
- m = adjust_address (op1, mode, 0);
- emit_move_insn (r, m);
- m = adjust_address (op1, mode, 16);
- r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
- emit_move_insn (op0, r);
+ if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ {
+ rtx r = gen_reg_rtx (mode);
+ m = adjust_address (op1, mode, 0);
+ emit_move_insn (r, m);
+ m = adjust_address (op1, mode, 16);
+ r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
+ emit_move_insn (op0, r);
+ }
+ else
+ emit_insn (load_unaligned (op0, op1));
}
- else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ else if (MEM_P (op0))
{
- m = adjust_address (op0, mode, 0);
- emit_insn (extract (m, op1, const0_rtx));
- m = adjust_address (op0, mode, 16);
- emit_insn (extract (m, op1, const1_rtx));
+ if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ {
+ m = adjust_address (op0, mode, 0);
+ emit_insn (extract (m, op1, const0_rtx));
+ m = adjust_address (op0, mode, 16);
+ emit_insn (extract (m, op1, const1_rtx));
+ }
+ else
+ emit_insn (store_unaligned (op0, op1));
}
else
- emit_insn (move_unaligned (op0, op1));
+ gcc_unreachable ();
}
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
@@ -16195,7 +16209,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (gen_sse2_loaddqu (op0, op1));
}
else if (TARGET_SSE2 && mode == V2DFmode)
{
@@ -16207,7 +16221,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|| optimize_function_for_size_p (cfun))
{
/* We will eventually emit movups based on insn attributes. */
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_loadupd (op0, op1));
return;
}
@@ -16245,7 +16259,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_loadups (op0, op1));
return;
}
@@ -16270,7 +16284,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (gen_sse2_storedqu (op0, op1));
}
else if (TARGET_SSE2 && mode == V2DFmode)
{
@@ -16279,7 +16293,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
/* We will eventually emit movups based on insn attributes. */
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_storeupd (op0, op1));
else
{
m = adjust_address (op0, DFmode, 0);
@@ -16299,7 +16313,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|| optimize_function_for_size_p (cfun))
{
op0 = gen_lowpart (V4SFmode, op0);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_storeups (op0, op1));
}
else
{
@@ -26765,9 +26779,9 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
/* SSE */
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
@@ -26781,14 +26795,14 @@ static const struct builtin_description bdesc_special_args[] =
/* SSE2 */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
@@ -26813,12 +26827,12 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a73c815..299b0d9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -21,7 +21,8 @@
(define_c_enum "unspec" [
;; SSE
UNSPEC_MOVNT
- UNSPEC_MOVU
+ UNSPEC_LOADU
+ UNSPEC_STOREU
;; SSE3
UNSPEC_LDDQU
@@ -586,12 +587,12 @@
DONE;
})
-(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>"
- [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
+(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VF 0 "register_operand" "=x")
(unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ [(match_operand:VF 1 "memory_operand" "m")]
+ UNSPEC_LOADU))]
+ "TARGET_SSE"
{
switch (get_attr_mode (insn))
{
@@ -618,11 +619,79 @@
]
(const_string "<MODE>")))])
-(define_insn "<sse2>_movdqu<avxsizesuffix>"
- [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
- (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
- UNSPEC_MOVU))]
- "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
+ [(set (match_operand:VF 0 "memory_operand" "=m")
+ (unspec:VF
+ [(match_operand:VF 1 "register_operand" "x")]
+ UNSPEC_STOREU))]
+ "TARGET_SSE"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ case MODE_V4SF:
+ return "%vmovups\t{%1, %0|%0, %1}";
+ default:
+ return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set (attr "mode")
+ (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "<ssePSmode>")
+ (and (eq_attr "alternative" "1")
+ (match_test "TARGET_SSE_TYPELESS_STORES"))
+ (const_string "<ssePSmode>")
+ (match_test "TARGET_AVX")
+ (const_string "<MODE>")
+ (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "V4SF")
+ ]
+ (const_string "<MODE>")))])
+
+(define_insn "<sse2>_loaddqu<avxsizesuffix>"
+ [(set (match_operand:VI1 0 "register_operand" "=x")
+ (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
+ UNSPEC_LOADU))]
+ "TARGET_SSE2"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ case MODE_V4SF:
+ return "%vmovups\t{%1, %0|%0, %1}";
+ default:
+ return "%vmovdqu\t{%1, %0|%0, %1}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set (attr "mode")
+ (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (const_string "<ssePSmode>")
+ (and (eq_attr "alternative" "1")
+ (match_test "TARGET_SSE_TYPELESS_STORES"))
+ (const_string "<ssePSmode>")
+ (match_test "TARGET_AVX")
+ (const_string "<sseinsnmode>")
+ (match_test "optimize_function_for_size_p (cfun)")
+ (const_string "V4SF")
+ ]
+ (const_string "<sseinsnmode>")))])
+
+(define_insn "<sse2>_storedqu<avxsizesuffix>"
+ [(set (match_operand:VI1 0 "memory_operand" "=m")
+ (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
+ UNSPEC_STOREU))]
+ "TARGET_SSE2"
{
switch (get_attr_mode (insn))
{
@@ -9307,7 +9376,7 @@
(match_operand:SI 3 "register_operand" "a")
(unspec:V16QI
[(match_operand:V16QI 4 "memory_operand" "m")]
- UNSPEC_MOVU)
+ UNSPEC_LOADU)
(match_operand:SI 5 "register_operand" "d")
(match_operand:SI 6 "const_0_to_255_operand" "n")]
UNSPEC_PCMPESTR))
@@ -9315,7 +9384,7 @@
(unspec:V16QI
[(match_dup 2)
(match_dup 3)
- (unspec:V16QI [(match_dup 4)] UNSPEC_MOVU)
+ (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
(match_dup 5)
(match_dup 6)]
UNSPEC_PCMPESTR))
@@ -9323,7 +9392,7 @@
(unspec:CC
[(match_dup 2)
(match_dup 3)
- (unspec:V16QI [(match_dup 4)] UNSPEC_MOVU)
+ (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
(match_dup 5)
(match_dup 6)]
UNSPEC_PCMPESTR))]
@@ -9498,19 +9567,19 @@
[(match_operand:V16QI 2 "register_operand" "x")
(unspec:V16QI
[(match_operand:V16QI 3 "memory_operand" "m")]
- UNSPEC_MOVU)
+ UNSPEC_LOADU)
(match_operand:SI 4 "const_0_to_255_operand" "n")]
UNSPEC_PCMPISTR))
(set (match_operand:V16QI 1 "register_operand" "=Yz")
(unspec:V16QI
[(match_dup 2)
- (unspec:V16QI [(match_dup 3)] UNSPEC_MOVU)
+ (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
(match_dup 4)]
UNSPEC_PCMPISTR))
(set (reg:CC FLAGS_REG)
(unspec:CC
[(match_dup 2)
- (unspec:V16QI [(match_dup 3)] UNSPEC_MOVU)
+ (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
(match_dup 4)]
UNSPEC_PCMPISTR))]
"TARGET_SSE4_2
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c
index c2511c6..e7eef6d 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c
@@ -14,6 +14,6 @@ avx_test (void)
c[i] = a[i] * b[i+3];
}
-/* { dg-final { scan-assembler-not "avx_movups256/1" } } */
-/* { dg-final { scan-assembler "sse_movups/1" } } */
+/* { dg-final { scan-assembler-not "avx_loadups256" } } */
+/* { dg-final { scan-assembler "sse_loadups" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c
index 9d71673..3f4fbf7 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c
@@ -24,6 +24,6 @@ avx_test (void)
}
}
-/* { dg-final { scan-assembler-not "avx_movdqu256/1" } } */
-/* { dg-final { scan-assembler "sse2_movdqu/1" } } */
+/* { dg-final { scan-assembler-not "avx_loaddqu256" } } */
+/* { dg-final { scan-assembler "sse2_loaddqu" } } */
/* { dg-final { scan-assembler "vinsert.128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c
index efb5f57..b0e0e79 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c
@@ -14,6 +14,6 @@ avx_test (void)
c[i] = a[i] * b[i+3];
}
-/* { dg-final { scan-assembler-not "avx_movupd256/1" } } */
-/* { dg-final { scan-assembler "sse2_movupd/1" } } */
+/* { dg-final { scan-assembler-not "avx_loadupd256" } } */
+/* { dg-final { scan-assembler "sse2_loadupd" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c
index e527b38..e0eb92b 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c
@@ -14,6 +14,6 @@ avx_test (void)
b[i] = a[i+3] * 2;
}
-/* { dg-final { scan-assembler "avx_movups256/1" } } */
-/* { dg-final { scan-assembler-not "avx_movups/1" } } */
+/* { dg-final { scan-assembler "avx_loadups256" } } */
+/* { dg-final { scan-assembler-not "sse_loadups" } } */
/* { dg-final { scan-assembler-not "vinsertf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c
index 0b58396..1a53ba1 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c
@@ -17,6 +17,6 @@ avx_test (void)
d[i] = c[i] * 20.0;
}
-/* { dg-final { scan-assembler-not "avx_movups256/2" } } */
+/* { dg-final { scan-assembler-not "avx_storeups256" } } */
/* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
index eac460f..e98d1b6 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c
@@ -24,6 +24,6 @@ avx_test (void)
}
}
-/* { dg-final { scan-assembler-not "avx_movdqu256/2" } } */
+/* { dg-final { scan-assembler-not "avx_storedqu256" } } */
/* { dg-final { scan-assembler "vmovdqu.*\\*movv16qi_internal/3" } } */
/* { dg-final { scan-assembler "vextract.128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c
index 7536258..26c993b 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c
@@ -17,6 +17,6 @@ avx_test (void)
d[i] = c[i] * 20.0;
}
-/* { dg-final { scan-assembler-not "avx_movupd256/2" } } */
+/* { dg-final { scan-assembler-not "avx_storeupd256" } } */
/* { dg-final { scan-assembler "vmovupd.*\\*movv2df_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c
index 39b6f3b..6d734fa 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c
@@ -14,7 +14,7 @@ avx_test (void)
b[i+3] = a[i] * c[i];
}
-/* { dg-final { scan-assembler "avx_movups256/2" } } */
-/* { dg-final { scan-assembler-not "avx_movups/2" } } */
+/* { dg-final { scan-assembler "avx_storeups256" } } */
+/* { dg-final { scan-assembler-not "sse_storeups" } } */
/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */
/* { dg-final { scan-assembler-not "vextractf128" } } */