diff options
author | Min-Yih Hsu <min.hsu@sifive.com> | 2024-04-08 09:10:23 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-08 09:10:23 -0700 |
commit | f6315a957232b2f5c27476138e3f4e552861f956 (patch) | |
tree | 6cd0ff81e138e3806f01f1a3cd627791e9a5c547 | |
parent | 4d6e67f677bdf40360e6aaba171bcdec93990b01 (diff) | |
download | llvm-f6315a957232b2f5c27476138e3f4e552861f956.zip llvm-f6315a957232b2f5c27476138e3f4e552861f956.tar.gz llvm-f6315a957232b2f5c27476138e3f4e552861f956.tar.bz2 |
[AArch64][LoopIdiom] Disable LoopIdiomTransform when NoImplicitFloat is present (#87677)
This behavior is aligned with both LoopVectorizer and SLPVectorizer.
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp | 14 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll | 96 |
2 files changed, 106 insertions, 4 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp index 6dfb2b9..a9bd8d8 100644 --- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp @@ -190,17 +190,23 @@ AArch64LoopIdiomTransformPass::run(Loop &L, LoopAnalysisManager &AM, bool AArch64LoopIdiomTransform::run(Loop *L) { CurLoop = L; - if (DisableAll || L->getHeader()->getParent()->hasOptSize()) + Function &F = *L->getHeader()->getParent(); + if (DisableAll || F.hasOptSize()) return false; + if (F.hasFnAttribute(Attribute::NoImplicitFloat)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE << " is disabled on " << F.getName() + << " due to its NoImplicitFloat attribute"); + return false; + } + // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. if (!L->getLoopPreheader()) return false; - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" - << CurLoop->getHeader()->getParent()->getName() - << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" << F.getName() << "] Loop %" + << CurLoop->getHeader()->getName() << "\n"); return recognizeByteCompare(); } diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll index daa64f2..27ab114 100644 --- a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll +++ b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll @@ -2090,3 +2090,99 @@ while.end: ret i32 %res } +; The optimization should be disabled when noimplicitfloat is present. +define i32 @no_implicit_float(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) noimplicitfloat { +; CHECK-LABEL: define i32 @no_implicit_float( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; CHECK-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]] +; CHECK-NEXT: ret i32 [[RES]] +; +; LOOP-DEL-LABEL: define i32 @no_implicit_float( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: br label [[WHILE_COND:%.*]] +; LOOP-DEL: while.cond: +; LOOP-DEL-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; LOOP-DEL-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; LOOP-DEL-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LOOP-DEL: while.body: +; LOOP-DEL-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LOOP-DEL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LOOP-DEL-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LOOP-DEL: while.end: +; LOOP-DEL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; LOOP-DEL-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ] +; LOOP-DEL-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]] +; LOOP-DEL-NEXT: ret i32 [[RES]] +; +; NO-TRANSFORM-LABEL: define i32 @no_implicit_float( +; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; NO-TRANSFORM-NEXT: entry: +; NO-TRANSFORM-NEXT: br label [[WHILE_COND:%.*]] +; NO-TRANSFORM: while.cond: +; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; NO-TRANSFORM: while.body: +; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; NO-TRANSFORM: while.end: +; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; NO-TRANSFORM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ] +; NO-TRANSFORM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]] +; NO-TRANSFORM-NEXT: ret i32 [[RES]] +; +entry: + br label %while.cond + +while.cond: + %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.end + +while.end: + %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ] + %extra.phi = phi i32 [ %extra, %while.body ], [ %extra, %while.cond ] + %res = add i32 %inc.lcssa, %extra.phi + ret i32 %res +} + |