diff options
| author | Benjamin Kramer <benny.kra@googlemail.com> | 2011-01-02 19:44:58 +0000 | 
|---|---|---|
| committer | Benjamin Kramer <benny.kra@googlemail.com> | 2011-01-02 19:44:58 +0000 | 
| commit | 2fdea4c8f1e89f36548a92bb68f003a40c07372c (patch) | |
| tree | 8370a3b6024f8875ae1bf58af5087c581576db81 /llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | |
| parent | 68b7bb95d48d1b2dfabcb33447e6a1f1b3928f8e (diff) | |
| download | llvm-2fdea4c8f1e89f36548a92bb68f003a40c07372c.zip llvm-2fdea4c8f1e89f36548a92bb68f003a40c07372c.tar.gz llvm-2fdea4c8f1e89f36548a92bb68f003a40c07372c.tar.bz2  | |
Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors.
We could implement a DAGCombine to turn x * 0x0101 back into logic operations
on targets that doesn't support the multiply or it is slow (p4) if someone cares
enough.
Example code:
  void test(char *s, int a) {
      __builtin_memset(s, a, 4);
  }
before:
  _test:                                  ## @test
    movzbl  8(%esp), %eax
    movl  %eax, %ecx
    shll  $8, %ecx
    orl %eax, %ecx
    movl  %ecx, %eax
    shll  $16, %eax
    orl %ecx, %eax
    movl  4(%esp), %ecx
    movl  %eax, 4(%ecx)
    movl  %eax, (%ecx)
    ret
after:
  _test:                                  ## @test
    movzbl  8(%esp), %eax
    imull $16843009, %eax, %eax   ## imm = 0x1010101
    movl  4(%esp), %ecx
    movl  %eax, 4(%ecx)
    movl  %eax, (%ecx)
    ret
llvm-svn: 122707
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 32 | 
1 files changed, 17 insertions, 15 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9b2a601..b639b7c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3132,6 +3132,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {                   &ArgChains[0], ArgChains.size());  } +/// SplatByte - Distribute ByteVal over NumBits bits. +static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { +  APInt Val = APInt(NumBits, ByteVal); +  unsigned Shift = 8; +  for (unsigned i = NumBits; i > 8; i >>= 1) { +    Val = (Val << Shift) | Val; +    Shift <<= 1; +  } +  return Val; +} +  /// getMemsetValue - Vectorized representation of the memset value  /// operand.  static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, @@ -3140,27 +3151,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,    unsigned NumBits = VT.getScalarType().getSizeInBits();    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { -    APInt Val = APInt(NumBits, C->getZExtValue() & 255); -    unsigned Shift = 8; -    for (unsigned i = NumBits; i > 8; i >>= 1) { -      Val = (Val << Shift) | Val; -      Shift <<= 1; -    } +    APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);      if (VT.isInteger())        return DAG.getConstant(Val, VT);      return DAG.getConstantFP(APFloat(Val), VT);    } -  const TargetLowering &TLI = DAG.getTargetLoweringInfo();    Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); -  unsigned Shift = 8; -  for (unsigned i = NumBits; i > 8; i >>= 1) { -    Value = DAG.getNode(ISD::OR, dl, VT, -                        DAG.getNode(ISD::SHL, dl, VT, Value, -                                    DAG.getConstant(Shift, -                                                    TLI.getShiftAmountTy())), -                        Value); -    Shift <<= 1; +  if (NumBits > 8) { +    // Use a multiplication with 0x010101... to extend the input to the +    // required length. +    APInt Magic = SplatByte(NumBits, 0x01); +    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));    }    return Value;  | 
