aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-07-31 18:08:24 +0000
committerSanjay Patel <spatel@rotateright.com>2017-07-31 18:08:24 +0000
commitfea731a4aa6aabf270fbb9ba6401ca8826c55a9b (patch)
tree93d0d7e6532ef8f3b297548da5097f879e878312 /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent70d35e102ef8dbba10e2db84ea2dcbe95bbbfd38 (diff)
downloadllvm-fea731a4aa6aabf270fbb9ba6401ca8826c55a9b.zip
llvm-fea731a4aa6aabf270fbb9ba6401ca8826c55a9b.tar.gz
llvm-fea731a4aa6aabf270fbb9ba6401ca8826c55a9b.tar.bz2
[CGP] use subtract or subtract-of-cmps for result of memcmp expansion
As noted in the code comment, transforming this in the other direction might require a separate transform here in CGP given the block-at-a-time DAG constraint. Besides that theoretical motivation, there are 2 practical motivations for the subtract-of-cmps form: 1. The codegen for both x86 and PPC is better for this IR (though PPC could be better still). There is discussion about canonicalizing IR to the select form ( http://lists.llvm.org/pipermail/llvm-dev/2017-July/114885.html ), so we probably need to add DAG transforms for those patterns anyway, but this improves the memcmp output without waiting for that step. 2. If we allow vector-sized chunks for the load and compare, x86 is better prepared to convert that to optimal code when using subtract-of-cmps, so another prerequisite patch is avoided if we choose to enable that. Differential Revision: https://reviews.llvm.org/D34904 llvm-svn: 309597
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp24
1 files changed, 18 insertions, 6 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b5487b6..debfdc1 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2117,13 +2117,25 @@ Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) {
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
}
- // TODO: Instead of comparing ULT, just subtract and return the difference?
- Value *CmpNE = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
+ if (Size < 4) {
+ // The i8 and i16 cases don't need compares. We zext the loaded values and
+ // subtract them to get the suitable negative, zero, or positive i32 result.
+ LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());
+ LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());
+ return Builder.CreateSub(LoadSrc1, LoadSrc2);
+ }
+
+ // The result of memcmp is negative, zero, or positive, so produce that by
+ // subtracting 2 extended compare bits: sub (ugt, ult).
+ // If a target prefers to use selects to get -1/0/1, they should be able
+ // to transform this later. The inverse transform (going from selects to math)
+ // may not be possible in the DAG because the selects got converted into
+ // branches before we got there.
+ Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);
Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);
- Type *I32 = Builder.getInt32Ty();
- Value *Sel1 = Builder.CreateSelect(CmpULT, ConstantInt::get(I32, -1),
- ConstantInt::get(I32, 1));
- return Builder.CreateSelect(CmpNE, Sel1, ConstantInt::get(I32, 0));
+ Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
+ Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
+ return Builder.CreateSub(ZextUGT, ZextULT);
}
// This function expands the memcmp call into an inline expansion and returns