aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2019-05-15 04:49:11 +0000
committerTom Stellard <tstellar@redhat.com>2019-05-15 04:49:11 +0000
commit0203f70b7685e137abff634de5dec00bb54cb55d (patch)
treef95a1883d9a50065aeb1bae0e1b2410dab276417
parent7c1f15e355fbbf8e238ffc4e8ffd7b80db4d9f93 (diff)
downloadllvm-0203f70b7685e137abff634de5dec00bb54cb55d.zip
llvm-0203f70b7685e137abff634de5dec00bb54cb55d.tar.gz
llvm-0203f70b7685e137abff634de5dec00bb54cb55d.tar.bz2
Merging r360512:
------------------------------------------------------------------------ r360512 | ctopper | 2019-05-10 21:19:33 -0700 (Fri, 10 May 2019) | 5 lines [X86] Don't emit MOVNTDQA loads from fast-isel without SSE4.1. We were checking for SSE4.1 for FP types, but not integer 128-bit types. Fixes PR41837. ------------------------------------------------------------------------ llvm-svn: 360749
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp2
-rw-r--r--llvm/test/CodeGen/X86/fast-isel-nontemporal.ll72
2 files changed, 57 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 9dd3f26..12cd613 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (IsNonTemporal && Alignment >= 16)
+ if (IsNonTemporal && Alignment >= 16 && HasSSE41)
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
else if (Alignment >= 16)
diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
index db1ebfe..37e380b 100644
--- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -300,10 +300,20 @@ entry:
}
define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) {
-; SSE-LABEL: test_load_nt16xi8:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: movntdqa (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_load_nt16xi8:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_load_nt16xi8:
+; SSE4A: # %bb.0: # %entry
+; SSE4A-NEXT: movdqa (%rdi), %xmm0
+; SSE4A-NEXT: retq
+;
+; SSE41-LABEL: test_load_nt16xi8:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: movntdqa (%rdi), %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_load_nt16xi8:
; AVX: # %bb.0: # %entry
@@ -320,10 +330,20 @@ entry:
}
define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) {
-; SSE-LABEL: test_load_nt8xi16:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: movntdqa (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_load_nt8xi16:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_load_nt8xi16:
+; SSE4A: # %bb.0: # %entry
+; SSE4A-NEXT: movdqa (%rdi), %xmm0
+; SSE4A-NEXT: retq
+;
+; SSE41-LABEL: test_load_nt8xi16:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: movntdqa (%rdi), %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_load_nt8xi16:
; AVX: # %bb.0: # %entry
@@ -340,10 +360,20 @@ entry:
}
define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) {
-; SSE-LABEL: test_load_nt4xi32:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: movntdqa (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_load_nt4xi32:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_load_nt4xi32:
+; SSE4A: # %bb.0: # %entry
+; SSE4A-NEXT: movdqa (%rdi), %xmm0
+; SSE4A-NEXT: retq
+;
+; SSE41-LABEL: test_load_nt4xi32:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: movntdqa (%rdi), %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_load_nt4xi32:
; AVX: # %bb.0: # %entry
@@ -360,10 +390,20 @@ entry:
}
define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) {
-; SSE-LABEL: test_load_nt2xi64:
-; SSE: # %bb.0: # %entry
-; SSE-NEXT: movntdqa (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_load_nt2xi64:
+; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_load_nt2xi64:
+; SSE4A: # %bb.0: # %entry
+; SSE4A-NEXT: movdqa (%rdi), %xmm0
+; SSE4A-NEXT: retq
+;
+; SSE41-LABEL: test_load_nt2xi64:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: movntdqa (%rdi), %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_load_nt2xi64:
; AVX: # %bb.0: # %entry