diff options
author | David Green <david.green@arm.com> | 2021-06-17 09:53:33 +0100 |
---|---|---|
committer | David Green <david.green@arm.com> | 2021-06-17 09:53:33 +0100 |
commit | fda8b4714e05c68deee469970cb6f7f7ea8b32b7 (patch) | |
tree | 1867f81175f8f451c815c6751ad7d75b46a543c1 | |
parent | 9b1085604ecf3d2d1a416ee658bca981af133128 (diff) | |
download | llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.zip llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.tar.gz llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.tar.bz2 |
[InterleaveAccess] Copy fast math flags when adjusting binary operators in interleave access pass
The Interleave Access pass will convert shuffle(binop(load, load)) to
binop(shuffle(load), shuffle(load)), in order to create more
interleaving load patterns (VLD2/3/4) that might have been messed up by
instcombine. As shown in D104247 we were missing copying IR flags to the
new instruction though, which should just be kept the same as the
original instruction.
Differential Revision: https://reviews.llvm.org/D104255
4 files changed, 31 insertions, 31 deletions
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 7fc4602..0070c87 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -248,11 +248,11 @@ public: } #include "llvm/IR/Instruction.def" - static BinaryOperator *CreateWithCopiedFlags(BinaryOps Opc, - Value *V1, Value *V2, - Instruction *CopyO, - const Twine &Name = "") { - BinaryOperator *BO = Create(Opc, V1, V2, Name); + static BinaryOperator * + CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, + const Twine &Name = "", + Instruction *InsertBefore = nullptr) { + BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore); BO->copyIRFlags(CopyO); return BO; } diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 1007874..24a57cc 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -408,8 +408,8 @@ bool InterleavedAccess::replaceBinOpShuffles( auto *NewSVI2 = new ShuffleVectorInst( BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask, SVI->getName(), SVI); - Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2, - BI->getName(), SVI); + BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags( + BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI); SVI->replaceAllUsesWith(NewBI); LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI << "\n With : " << *NewSVI1 << "\n And : " diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll index 47327b8..f29fffc 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll @@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) { ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]] ; CHECK-NEXT: ret <4 x float> [[L6]] ; @@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) { ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]] -; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]] ; CHECK-NEXT: ret <4 x float> [[L9]] ; @@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) { ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]] -; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]] ; CHECK-NEXT: ret <4 x float> [[L12]] ; @@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1 -; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]] ; CHECK-NEXT: ret <4 x float> [[L8]] ; @@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1 -; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] ; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]] ; CHECK-NEXT: ret <4 x float> [[L8]] diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll index 4711409..5befe69 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll @@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) { ; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]] ; CHECK-NEXT: ret <4 x float> [[L6]] ; @@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) { ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]] -; CHECK-NEXT: [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]] ; CHECK-NEXT: ret <4 x float> [[L9]] ; @@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) { ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0 -; CHECK-NEXT: [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]] -; CHECK-NEXT: [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]] ; CHECK-NEXT: ret <4 x float> [[L12]] ; @@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-NEXT: [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1 -; CHECK-NEXT: [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]] +; CHECK-NEXT: [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]] ; CHECK-NEXT: ret <4 x float> [[L8]] ; @@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) { ; CHECK-NEXT: [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]]) ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1 -; CHECK-NEXT: [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]] ; CHECK-NEXT: [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]] ; CHECK-NEXT: [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]] ; CHECK-NEXT: ret <4 x float> [[L8]] |