diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2015-10-14 10:03:13 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2015-10-14 10:03:13 +0000 |
commit | c47edbef4cd3cf97a047db1296cb2dcea9485d2a (patch) | |
tree | babf2cb2f69aa552b9b3c55a89e567825c4566e5 | |
parent | 2df36481b6e20664b6a0206165e71cffda0569ba (diff) | |
download | llvm-c47edbef4cd3cf97a047db1296cb2dcea9485d2a.zip llvm-c47edbef4cd3cf97a047db1296cb2dcea9485d2a.tar.gz llvm-c47edbef4cd3cf97a047db1296cb2dcea9485d2a.tar.bz2 |
[x86][FastISel] Teach how to select nontemporal stores.
This patch teaches x86 fast-isel how to select nontemporal stores.
On x86, we can use MOVNTI for nontemporal stores of doublewords/quadwords.
Instructions (V)MOVNTPS/PD/DQ can be used for SSE2/AVX aligned nontemporal
vector stores.
Before this patch, fast-isel always selected 'movd/movq' instead of 'movnti'
for doubleword/quadword nontemporal stores. In the case of nontemporal stores
of aligned vectors, fast-isel always selected movaps/movapd/movdqa instead of
movntps/movntpd/movntdq.
With this patch, if we use SSE2/AVX intrinsics for nontemporal stores we now
always get the expected (V)MOVNT instructions.
The lack of fast-isel support for nontemporal stores was spotted when analyzing
the -O0 codegen for nontemporal stores.
Differential Revision: http://reviews.llvm.org/D13698
llvm-svn: 250285
-rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 50 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-nontemporal.ll | 69 |
2 files changed, 103 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 17704da..0771cbeb4 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -433,6 +433,10 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, X86AddressMode &AM, MachineMemOperand *MMO, bool Aligned) { + bool HasSSE2 = Subtarget->hasSSE2(); + bool HasAVX = Subtarget->hasAVX(); + bool IsNonTemporal = MMO && MMO->isNonTemporal(); + // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -449,35 +453,49 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, // FALLTHROUGH, handling i1 as i8. case MVT::i8: Opc = X86::MOV8mr; break; case MVT::i16: Opc = X86::MOV16mr; break; - case MVT::i32: Opc = X86::MOV32mr; break; - case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. + case MVT::i32: + Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr; + break; + case MVT::i64: + // Must be in x86-64 mode. + Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr; + break; case MVT::f32: - Opc = X86ScalarSSEf32 ? - (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m; + Opc = X86ScalarSSEf32 ? + (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m; break; case MVT::f64: Opc = X86ScalarSSEf64 ? - (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; + (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; break; case MVT::v4f32: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; - else - Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; + if (Aligned) { + if (IsNonTemporal) + Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr; + else + Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr; + } else + Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr; break; case MVT::v2f64: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr; - else - Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr; + if (Aligned) { + if (IsNonTemporal) + Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr; + else + Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr; + } else + Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - if (Aligned) - Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr; - else + if (Aligned) { + if (IsNonTemporal) + Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr; + else + Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr; + } else Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; break; } diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll new file mode 100644 index 0000000..824d8c3 --- /dev/null +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -0,0 +1,69 @@ +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX + +define void @test_nti32(i32* nocapture %ptr, i32 %X) { +; ALL-LABEL: test_nti32: +; ALL: # BB#0: # %entry +; ALL-NEXT: movntil %esi, (%rdi) +; ALL-NEXT: retq +entry: + store i32 %X, i32* %ptr, align 4, !nontemporal !1 + ret void +} + +define void @test_nti64(i64* nocapture %ptr, i64 %X) { +; ALL-LABEL: test_nti64: +; ALL: # BB#0: # %entry +; ALL-NEXT: movntiq %rsi, (%rdi) +; ALL-NEXT: retq +entry: + store i64 %X, i64* %ptr, align 8, !nontemporal !1 + ret void +} + +define void @test_nt4xfloat(<4 x float>* nocapture %ptr, <4 x float> %X) { +; SSE2-LABEL: test_nt4xfloat: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movntps %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: test_nt4xfloat: +; AVX: # BB#0: # %entry +; AVX-NEXT: vmovntps %xmm0, (%rdi) +; AVX-NEXT: retq +entry: + store <4 x float> %X, <4 x float>* %ptr, align 16, !nontemporal !1 + ret void +} + +define void @test_nt2xdouble(<2 x double>* nocapture %ptr, <2 x double> %X) { +; SSE2-LABEL: test_nt2xdouble: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movntpd %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: test_nt2xdouble: +; AVX: # BB#0: # %entry +; AVX-NEXT: vmovntpd %xmm0, (%rdi) +; AVX-NEXT: retq +entry: + store <2 x double> %X, <2 x double>* %ptr, align 16, !nontemporal !1 + ret void +} + +define void @test_nt2xi64(<2 x i64>* nocapture %ptr, <2 x i64> %X) { +; SSE2-LABEL: test_nt2xi64: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movntdq %xmm0, (%rdi) +; SSE2-NEXT: retq +; +; AVX-LABEL: test_nt2xi64: +; AVX: # BB#0: # %entry +; AVX-NEXT: vmovntdq %xmm0, (%rdi) +; AVX-NEXT: retq +entry: + store <2 x i64> %X, <2 x i64>* %ptr, align 16, !nontemporal !1 + ret void +} + +!1 = !{i32 1} |