[InterleaveAccess] Copy fast math flags when adjusting binary operators in interleave access pass

The Interleave Access pass will convert shuffle(binop(load, load)) to binop(shuffle(load), shuffle(load)), in order to create more interleaving load patterns (VLD2/3/4) that might have been messed up by instcombine. As shown in D104247 we were missing copying IR flags to the new instruction though, which should just be kept the same as the original instruction. Differential Revision: https://reviews.llvm.org/D104255
author: David Green <david.green@arm.com> 2021-06-17 09:53:33 +0100
committer: David Green <david.green@arm.com> 2021-06-17 09:53:33 +0100
commit: fda8b4714e05c68deee469970cb6f7f7ea8b32b7 (patch)
tree: 1867f81175f8f451c815c6751ad7d75b46a543c1
parent: 9b1085604ecf3d2d1a416ee658bca981af133128 (diff)
download: llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.zip
llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.tar.gz
llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.tar.bz2
4 files changed, 31 insertions, 31 deletions
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 7fc4602..0070c87 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -248,11 +248,11 @@ public:
   }
 #include "llvm/IR/Instruction.def"
 
-  static BinaryOperator *CreateWithCopiedFlags(BinaryOps Opc,
-                                               Value *V1, Value *V2,
-                                               Instruction *CopyO,
-                                               const Twine &Name = "") {
-    BinaryOperator *BO = Create(Opc, V1, V2, Name);
+  static BinaryOperator *
+  CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO,
+                        const Twine &Name = "",
+                        Instruction *InsertBefore = nullptr) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore);
     BO->copyIRFlags(CopyO);
     return BO;
   }
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 1007874..24a57cc 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -408,8 +408,8 @@ bool InterleavedAccess::replaceBinOpShuffles(
     auto *NewSVI2 = new ShuffleVectorInst(
         BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
         SVI->getName(), SVI);
-    Value *NewBI = BinaryOperator::Create(BI->getOpcode(), NewSVI1, NewSVI2,
-                                          BI->getName(), SVI);
+    BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
+        BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), SVI);
     SVI->replaceAllUsesWith(NewBI);
     LLVM_DEBUG(dbgs() << "  Replaced: " << *BI << "\n    And   : " << *SVI
                       << "\n  With    : " << *NewSVI1 << "\n    And   : "
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll
index 47327b8..f29fffc 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles-inseltpoison.ll
@@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT:    [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]]
 ; CHECK-NEXT:    ret <4 x float> [[L6]]
 ;
@@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT:    [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]]
-; CHECK-NEXT:    [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]]
 ; CHECK-NEXT:    ret <4 x float> [[L9]]
 ;
@@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT:    [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]]
-; CHECK-NEXT:    [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]]
-; CHECK-NEXT:    [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]]
 ; CHECK-NEXT:    ret <4 x float> [[L12]]
 ;
@@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
 ; CHECK-NEXT:    [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1
-; CHECK-NEXT:    [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
-; CHECK-NEXT:    [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]]
+; CHECK-NEXT:    [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
 ; CHECK-NEXT:    [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]]
 ; CHECK-NEXT:    ret <4 x float> [[L8]]
 ;
@@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
 ; CHECK-NEXT:    [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1
-; CHECK-NEXT:    [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
 ; CHECK-NEXT:    [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
 ; CHECK-NEXT:    [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]]
 ; CHECK-NEXT:    ret <4 x float> [[L8]]
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
index 4711409..5befe69 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/binopshuffles.ll
@@ -13,8 +13,8 @@ define <4 x float> @vld2(<8 x float>* %pSrc) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT:    [[L26:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[L43:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L26:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L43:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[L6:%.*]] = fadd fast <4 x float> [[L43]], [[L26]]
 ; CHECK-NEXT:    ret <4 x float> [[L6]]
 ;
@@ -39,10 +39,10 @@ define <4 x float> @vld3(<12 x float>* %pSrc) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT:    [[L29:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    [[L46:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L29:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[L46:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[L6:%.*]] = fadd fast <4 x float> [[L46]], [[L29]]
-; CHECK-NEXT:    [[L73:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L73:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[L9:%.*]] = fadd fast <4 x float> [[L6]], [[L73]]
 ; CHECK-NEXT:    ret <4 x float> [[L9]]
 ;
@@ -72,11 +72,11 @@ define <4 x float> @vld4(<16 x float>* %pSrc) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[LDN]], 0
-; CHECK-NEXT:    [[L312:%.*]] = fmul <4 x float> [[TMP7]], [[TMP8]]
-; CHECK-NEXT:    [[L59:%.*]] = fmul <4 x float> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[L312:%.*]] = fmul fast <4 x float> [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[L59:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[L7:%.*]] = fadd fast <4 x float> [[L59]], [[L312]]
-; CHECK-NEXT:    [[L86:%.*]] = fmul <4 x float> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[L103:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[L86:%.*]] = fmul fast <4 x float> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[L103:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[L12:%.*]] = fadd fast <4 x float> [[L103]], [[L86]]
 ; CHECK-NEXT:    ret <4 x float> [[L12]]
 ;
@@ -106,8 +106,8 @@ define <4 x float> @twosrc(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
 ; CHECK-NEXT:    [[LDN7:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN7]], 1
-; CHECK-NEXT:    [[L46:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
-; CHECK-NEXT:    [[L63:%.*]] = fmul <4 x float> [[TMP5]], [[TMP1]]
+; CHECK-NEXT:    [[L46:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[L63:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
 ; CHECK-NEXT:    [[L8:%.*]] = fadd fast <4 x float> [[L63]], [[L46]]
 ; CHECK-NEXT:    ret <4 x float> [[L8]]
 ;
@@ -133,7 +133,7 @@ define <4 x float> @twosrc2(<8 x float>* %pSrc1, <8 x float>* %pSrc2) {
 ; CHECK-NEXT:    [[LDN4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0v4f32(<4 x float>* [[TMP3]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN4]], 1
-; CHECK-NEXT:    [[L43:%.*]] = fmul <4 x float> [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    [[L43:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
 ; CHECK-NEXT:    [[L6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP1]]
 ; CHECK-NEXT:    [[L8:%.*]] = fadd fast <4 x float> [[L6]], [[L43]]
 ; CHECK-NEXT:    ret <4 x float> [[L8]]
author	David Green <david.green@arm.com>	2021-06-17 09:53:33 +0100
committer	David Green <david.green@arm.com>	2021-06-17 09:53:33 +0100
commit	fda8b4714e05c68deee469970cb6f7f7ea8b32b7 (patch)
tree	1867f81175f8f451c815c6751ad7d75b46a543c1
parent	9b1085604ecf3d2d1a416ee658bca981af133128 (diff)
download	llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.zip llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.tar.gz llvm-fda8b4714e05c68deee469970cb6f7f7ea8b32b7.tar.bz2