From 24c5184dcaf4ea554bd720b7e21c437f3183e662 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 17 Jan 2011 12:04:57 +0000 Subject: Add a DAGCombine to turn (ctpop x) u< 2 into (x & x-1) == 0. This shaves off 4 popcounts from the hacked 186.crafty source. This is enabled even when a native popcount instruction is available. The combined code is one operation longer but it should be faster nevertheless. llvm-svn: 123621 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp') diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f9e0992..1621d61 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1870,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + SDValue CTPOP = N0; + // Look through truncs that don't change the value of a ctpop. + if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) + CTPOP = N0.getOperand(0); + + if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && + (N0 == CTPOP || N0.getValueType().getSizeInBits() >= + Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ + SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, + DAG.getConstant(1, CTVT)); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC); + } + + // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. -- cgit v1.1