diff options
author | Tom Stellard <tstellar@redhat.com> | 2019-05-15 04:49:11 +0000 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2019-05-15 04:49:11 +0000 |
commit | 0203f70b7685e137abff634de5dec00bb54cb55d (patch) | |
tree | f95a1883d9a50065aeb1bae0e1b2410dab276417 | |
parent | 7c1f15e355fbbf8e238ffc4e8ffd7b80db4d9f93 (diff) | |
download | llvm-0203f70b7685e137abff634de5dec00bb54cb55d.zip llvm-0203f70b7685e137abff634de5dec00bb54cb55d.tar.gz llvm-0203f70b7685e137abff634de5dec00bb54cb55d.tar.bz2 |
Merging r360512:
------------------------------------------------------------------------
r360512 | ctopper | 2019-05-10 21:19:33 -0700 (Fri, 10 May 2019) | 5 lines
[X86] Don't emit MOVNTDQA loads from fast-isel without SSE4.1.
We were checking for SSE4.1 for FP types, but not integer 128-bit types.
Fixes PR41837.
------------------------------------------------------------------------
llvm-svn: 360749
-rw-r--r-- | llvm/lib/Target/X86/X86FastISel.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-nontemporal.ll | 72 |
2 files changed, 57 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 9dd3f26..12cd613 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - if (IsNonTemporal && Alignment >= 16) + if (IsNonTemporal && Alignment >= 16 && HasSSE41) Opc = HasVLX ? X86::VMOVNTDQAZ128rm : HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; else if (Alignment >= 16) diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll index db1ebfe..37e380b 100644 --- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -300,10 +300,20 @@ entry: } define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) { -; SSE-LABEL: test_load_nt16xi8: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt16xi8: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt16xi8: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt16xi8: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt16xi8: ; AVX: # %bb.0: # %entry @@ -320,10 +330,20 @@ entry: } define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) { -; SSE-LABEL: test_load_nt8xi16: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt8xi16: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt8xi16: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt8xi16: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt8xi16: ; AVX: # %bb.0: # %entry @@ -340,10 +360,20 @@ entry: } define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) { -; SSE-LABEL: test_load_nt4xi32: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt4xi32: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt4xi32: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt4xi32: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt4xi32: ; AVX: # %bb.0: # %entry @@ -360,10 +390,20 @@ entry: } define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) { -; SSE-LABEL: test_load_nt2xi64: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movntdqa (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_load_nt2xi64: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE4A-LABEL: test_load_nt2xi64: +; SSE4A: # %bb.0: # %entry +; SSE4A-NEXT: movdqa (%rdi), %xmm0 +; SSE4A-NEXT: retq +; +; SSE41-LABEL: test_load_nt2xi64: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: movntdqa (%rdi), %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_load_nt2xi64: ; AVX: # %bb.0: # %entry |