aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2021-06-17 09:53:33 +0100
committerDavid Green <david.green@arm.com>2021-06-17 09:53:33 +0100
commitfda8b4714e05c68deee469970cb6f7f7ea8b32b7 (patch)
tree1867f81175f8f451c815c6751ad7d75b46a543c1
parent9b1085604ecf3d2d1a416ee658bca981af133128 (diff)
downloadllvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.zip
llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.tar.gz
llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.tar.bz2
[InterleaveAccess] Copy fast math flags when adjusting binary operators in interleave access pass
The Interleave Access pass will convert shuffle(binop(load, load)) to binop(shuffle(load), shuffle(load)), in order to create more interleaving load patterns (VLD2/3/4) that might have been messed up by instcombine. As shown in D104247 we were missing copying IR flags to the new instruction though, which should just be kept the same as the original instruction. Differential Revision: https://reviews.llvm.org/D104255
-rw-r--r--llvm/include/llvm/IR/InstrTypes.h10
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp4
-rw-r--r--llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll24
-rw-r--r--llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll24
4 files changed, 31 insertions, 31 deletions
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 7fc4602..0070c87 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -248,11 +248,11 @@ public:
}
#include "llvm/IR/Instruction.def"
- static BinaryOperator *CreateWithCopiedFlags(BinaryOps Opc,
- Value *V1, Value *V2,
- Instruction *CopyO,
- const Twine &Name = "") {
- BinaryOperator *BO = Create(Opc, V1, V2, Name);
+ static BinaryOperator *
+ CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO,
+ const Twine &Name = "",
+ Instruction *InsertBefore = nullptr) {
+ BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore);
BO->copyIRFlags(CopyO);
return BO;
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 1007874..24a57cc 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -408,8 +408,8 @@ bool InterleavedAccess::replaceBinOpShuffles(
auto *NewSVI2 = new ShuffleVectorInst(
BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
SVI->getName(), SVI);
- Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
- BI->getName(), SVI);
+ BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
+ BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
SVI->replaceAllUsesWith(NewBI);
LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI
<< "\n With : " << *NewSVI1 << "\n And : "
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll
index 47327b8..f29fffc 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll
@@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) {
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]]
; CHECK-NEXT: ret <4 x float> [[L6]]
;
@@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) {
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]]
-; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]]
; CHECK-NEXT: ret <4 x float> [[L9]]
;
@@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) {
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]]
-; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]]
; CHECK-NEXT: ret <4 x float> [[L12]]
;
@@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1
-; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
-; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]]
+; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]]
; CHECK-NEXT: ret <4 x float> [[L8]]
;
@@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1
-; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]]
; CHECK-NEXT: ret <4 x float> [[L8]]
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
index 4711409..5befe69 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
@@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) {
; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]]
; CHECK-NEXT: ret <4 x float> [[L6]]
;
@@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) {
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]]
-; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]]
; CHECK-NEXT: ret <4 x float> [[L9]]
;
@@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) {
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]]
-; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]]
; CHECK-NEXT: ret <4 x float> [[L12]]
;
@@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1
-; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
-; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]]
+; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]]
; CHECK-NEXT: ret <4 x float> [[L8]]
;
@@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1
-; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]]
; CHECK-NEXT: ret <4 x float> [[L8]]