diff options
author | Jonas Paulsson <paulson1@linux.ibm.com> | 2023-12-08 17:19:17 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-08 17:19:17 +0100 |
commit | 435ba72afda756183a1ddc7a3a160152ad630951 (patch) | |
tree | 0654cd5618b8672cf5902011b6017786cf03a94f | |
parent | a87738f86b17f4a8dcde538c60826506e2a27ed1 (diff) | |
download | llvm-435ba72afda756183a1ddc7a3a160152ad630951.zip llvm-435ba72afda756183a1ddc7a3a160152ad630951.tar.gz llvm-435ba72afda756183a1ddc7a3a160152ad630951.tar.bz2 |
[SystemZ] Simplify handling of AtomicRMW instructions. (#74789)
Let the AtomicExpand pass do more of the job of expanding
AtomicRMWInst:s in order to simplify the handling in the backend.
The only cases that the backend needs to handle itself are those of
subword size (8/16 bits) and those directly corresponding to a target
instruction.
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 408 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrFormats.td | 24 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 83 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-add-04.ll | 61 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-and-03.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-and-04.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll | 304 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll | 245 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-nand-03.ll | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-nand-04.ll | 94 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-or-04.ll | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-sub-04.ll | 61 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/atomicrmw-xor-04.ll | 9 |
16 files changed, 646 insertions, 696 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 873994c..f79787d 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -872,13 +872,21 @@ bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const TargetLowering::AtomicExpansionKind SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { - // TODO: expand them all here instead of in backend. - return (RMW->isFloatingPointOperation() || - RMW->getOperation() == AtomicRMWInst::UIncWrap || - RMW->getOperation() == AtomicRMWInst::UDecWrap || - RMW->getType()->isIntegerTy(128)) - ? AtomicExpansionKind::CmpXChg - : AtomicExpansionKind::None; + // Don't expand subword operations as they require special treatment. + if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16)) + return AtomicExpansionKind::None; + + // Don't expand if there is a target instruction available. + if (Subtarget.hasInterlockedAccess1() && + (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) && + (RMW->getOperation() == AtomicRMWInst::BinOp::Add || + RMW->getOperation() == AtomicRMWInst::BinOp::Sub || + RMW->getOperation() == AtomicRMWInst::BinOp::And || + RMW->getOperation() == AtomicRMWInst::BinOp::Or || + RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) + return AtomicExpansionKind::None; + + return AtomicExpansionKind::CmpXChg; } bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { @@ -4350,6 +4358,31 @@ SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, return Chain; } +// Prepare for a Compare And Swap for a subword operation. This needs to be +// done in memory with 4 bytes at natural alignment. +static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, + SDValue &AlignedAddr, SDValue &BitShift, + SDValue &NegBitShift) { + EVT PtrVT = Addr.getValueType(); + EVT WideVT = MVT::i32; + + // Get the address of the containing word. + AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, + DAG.getConstant(-4, DL, PtrVT)); + + // Get the number of bits that the word must be rotated left in order + // to bring the field to the top bits of a GR32. + BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, + DAG.getConstant(3, DL, PtrVT)); + BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); + + // Get the complementing shift amount, for rotating a field in the top + // bits back to its proper position. + NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, + DAG.getConstant(0, DL, WideVT), BitShift); + +} + // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first // two into the fullword ATOMIC_LOADW_* operation given by Opcode. SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, @@ -4357,7 +4390,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, unsigned Opcode) const { auto *Node = cast<AtomicSDNode>(Op.getNode()); - // 32-bit operations need no code outside the main loop. + // 32-bit operations need no special handling. EVT NarrowVT = Node->getMemoryVT(); EVT WideVT = MVT::i32; if (NarrowVT == WideVT) @@ -4369,7 +4402,6 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, SDValue Src2 = Node->getVal(); MachineMemOperand *MMO = Node->getMemOperand(); SDLoc DL(Node); - EVT PtrVT = Addr.getValueType(); // Convert atomic subtracts of constants into additions. if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) @@ -4378,20 +4410,8 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); } - // Get the address of the containing word. - SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, DL, PtrVT)); - - // Get the number of bits that the word must be rotated left in order - // to bring the field to the top bits of a GR32. - SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, - DAG.getConstant(3, DL, PtrVT)); - BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); - - // Get the complementing shift amount, for rotating a field in the top - // bits back to its proper position. - SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, - DAG.getConstant(0, DL, WideVT), BitShift); + SDValue AlignedAddr, BitShift, NegBitShift; + getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); // Extend the source operand to 32 bits and prepare it for the inner loop. // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other @@ -4423,38 +4443,24 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, return DAG.getMergeValues(RetOps, DL); } -// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations -// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit -// operations into additions. +// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into +// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast<AtomicSDNode>(Op.getNode()); EVT MemVT = Node->getMemoryVT(); if (MemVT == MVT::i32 || MemVT == MVT::i64) { - // A full-width operation. + // A full-width operation: negate and use LAA(G). assert(Op.getValueType() == MemVT && "Mismatched VTs"); + assert(Subtarget.hasInterlockedAccess1() && + "Should have been expanded by AtomicExpand pass."); SDValue Src2 = Node->getVal(); - SDValue NegSrc2; SDLoc DL(Src2); - - if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) { - // Use an addition if the operand is constant and either LAA(G) is - // available or the negative value is in the range of A(G)FHI. - int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); - if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) - NegSrc2 = DAG.getConstant(Value, DL, MemVT); - } else if (Subtarget.hasInterlockedAccess1()) - // Use LAA(G) if available. - NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), - Src2); - - if (NegSrc2.getNode()) - return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, - Node->getChain(), Node->getBasePtr(), NegSrc2, - Node->getMemOperand()); - - // Use the node as-is. - return Op; + SDValue NegSrc2 = + DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); + return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, + Node->getChain(), Node->getBasePtr(), NegSrc2, + Node->getMemOperand()); } return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); @@ -4492,22 +4498,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, // Convert 8-bit and 16-bit compare and swap to a loop, implemented // via a fullword ATOMIC_CMP_SWAPW operation. int64_t BitSize = NarrowVT.getSizeInBits(); - EVT PtrVT = Addr.getValueType(); - - // Get the address of the containing word. - SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, - DAG.getConstant(-4, DL, PtrVT)); - // Get the number of bits that the word must be rotated left in order - // to bring the field to the top bits of a GR32. - SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, - DAG.getConstant(3, DL, PtrVT)); - BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); - - // Get the complementing shift amount, for rotating a field in the top - // bits back to its proper position. - SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, - DAG.getConstant(0, DL, WideVT), BitShift); + SDValue AlignedAddr, BitShift, NegBitShift; + getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); // Construct the ATOMIC_CMP_SWAPW node. SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); @@ -7951,20 +7944,17 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, return JoinMBB; } -// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* -// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that -// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. -// BitSize is the width of the field in bits, or 0 if this is a partword -// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize -// is one of the operands. Invert says whether the field should be -// inverted after performing BinOpcode (e.g. for NAND). +// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or +// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs +// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says +// whether the field should be inverted after performing BinOpcode (e.g. for +// NAND). MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, - unsigned BitSize, bool Invert) const { + bool Invert) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. // Src2 can be a register or immediate. @@ -7972,31 +7962,22 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); - Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register(); - Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register(); + Register BitShift = MI.getOperand(4).getReg(); + Register NegBitShift = MI.getOperand(5).getReg(); + unsigned BitSize = MI.getOperand(6).getImm(); DebugLoc DL = MI.getDebugLoc(); - if (IsSubWord) - BitSize = MI.getOperand(6).getImm(); - - // Subword operations use 32-bit registers. - const TargetRegisterClass *RC = (BitSize <= 32 ? - &SystemZ::GR32BitRegClass : - &SystemZ::GR64BitRegClass); - unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; - unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; // Get the right opcodes for the displacement. - LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); - CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); + unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); + unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. - Register OrigVal = MRI.createVirtualRegister(RC); - Register OldVal = MRI.createVirtualRegister(RC); - Register NewVal = (BinOpcode || IsSubWord ? - MRI.createVirtualRegister(RC) : Src2.getReg()); - Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); - Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); + Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); // Insert a basic block for the main loop. MachineBasicBlock *StartMBB = MBB; @@ -8023,39 +8004,28 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigVal).addMBB(StartMBB) .addReg(Dest).addMBB(LoopMBB); - if (IsSubWord) - BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) - .addReg(OldVal).addReg(BitShift).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) + .addReg(OldVal).addReg(BitShift).addImm(0); if (Invert) { // Perform the operation normally and then invert every bit of the field. - Register Tmp = MRI.createVirtualRegister(RC); + Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); - if (BitSize <= 32) - // XILF with the upper BitSize bits set. - BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) - .addReg(Tmp).addImm(-1U << (32 - BitSize)); - else { - // Use LCGR and add -1 to the result, which is more compact than - // an XILF, XILH pair. - Register Tmp2 = MRI.createVirtualRegister(RC); - BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); - BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) - .addReg(Tmp2).addImm(-1); - } + // XILF with the upper BitSize bits set. + BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) + .addReg(Tmp).addImm(-1U << (32 - BitSize)); } else if (BinOpcode) // A simply binary operation. BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) .addReg(RotatedOldVal) .add(Src2); - else if (IsSubWord) + else // Use RISBG to rotate Src2 into position and use it to replace the // field in RotatedOldVal. BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) .addReg(RotatedOldVal).addReg(Src2.getReg()) .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); - if (IsSubWord) - BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) - .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) + .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal) .addReg(NewVal) @@ -8070,50 +8040,40 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( return DoneMBB; } -// Implement EmitInstrWithCustomInserter for pseudo -// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the +// Implement EmitInstrWithCustomInserter for subword pseudo +// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the // instruction that should be used to compare the current field with the // minimum or maximum value. KeepOldMask is the BRC condition-code mask -// for when the current field should be kept. BitSize is the width of -// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. +// for when the current field should be kept. MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, - unsigned KeepOldMask, unsigned BitSize) const { + unsigned KeepOldMask) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); Register Src2 = MI.getOperand(3).getReg(); - Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register()); - Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register()); + Register BitShift = MI.getOperand(4).getReg(); + Register NegBitShift = MI.getOperand(5).getReg(); + unsigned BitSize = MI.getOperand(6).getImm(); DebugLoc DL = MI.getDebugLoc(); - if (IsSubWord) - BitSize = MI.getOperand(6).getImm(); - - // Subword operations use 32-bit registers. - const TargetRegisterClass *RC = (BitSize <= 32 ? - &SystemZ::GR32BitRegClass : - &SystemZ::GR64BitRegClass); - unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; - unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; // Get the right opcodes for the displacement. - LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); - CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); + unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); + unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. - Register OrigVal = MRI.createVirtualRegister(RC); - Register OldVal = MRI.createVirtualRegister(RC); - Register NewVal = MRI.createVirtualRegister(RC); - Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); - Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); - Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); + Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); // Insert 3 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; @@ -8139,9 +8099,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigVal).addMBB(StartMBB) .addReg(Dest).addMBB(UpdateMBB); - if (IsSubWord) - BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) - .addReg(OldVal).addReg(BitShift).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) + .addReg(OldVal).addReg(BitShift).addImm(0); BuildMI(MBB, DL, TII->get(CompareOpcode)) .addReg(RotatedOldVal).addReg(Src2); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) @@ -8153,10 +8112,9 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 // # fall through to UpdateMBB MBB = UseAltMBB; - if (IsSubWord) - BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) - .addReg(RotatedOldVal).addReg(Src2) - .addImm(32).addImm(31 + BitSize).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) + .addReg(RotatedOldVal).addReg(Src2) + .addImm(32).addImm(31 + BitSize).addImm(0); MBB->addSuccessor(UpdateMBB); // UpdateMBB: @@ -8170,9 +8128,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) .addReg(RotatedOldVal).addMBB(LoopMBB) .addReg(RotatedAltVal).addMBB(UseAltMBB); - if (IsSubWord) - BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) - .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); + BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) + .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal) .addReg(NewVal) @@ -8187,7 +8144,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( return DoneMBB; } -// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW +// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW // instruction MI. MachineBasicBlock * SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, @@ -9004,171 +8961,44 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( return emitExt128(MI, MBB, true); case SystemZ::ATOMIC_SWAPW: - return emitAtomicLoadBinary(MI, MBB, 0, 0); - case SystemZ::ATOMIC_SWAP_32: - return emitAtomicLoadBinary(MI, MBB, 0, 32); - case SystemZ::ATOMIC_SWAP_64: - return emitAtomicLoadBinary(MI, MBB, 0, 64); + return emitAtomicLoadBinary(MI, MBB, 0); case SystemZ::ATOMIC_LOADW_AR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::AR); case SystemZ::ATOMIC_LOADW_AFI: - return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0); - case SystemZ::ATOMIC_LOAD_AR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32); - case SystemZ::ATOMIC_LOAD_AHI: - return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32); - case SystemZ::ATOMIC_LOAD_AFI: - return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32); - case SystemZ::ATOMIC_LOAD_AGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64); - case SystemZ::ATOMIC_LOAD_AGHI: - return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64); - case SystemZ::ATOMIC_LOAD_AGFI: - return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI); case SystemZ::ATOMIC_LOADW_SR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0); - case SystemZ::ATOMIC_LOAD_SR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32); - case SystemZ::ATOMIC_LOAD_SGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::SR); case SystemZ::ATOMIC_LOADW_NR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR); case SystemZ::ATOMIC_LOADW_NILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0); - case SystemZ::ATOMIC_LOAD_NR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); - case SystemZ::ATOMIC_LOAD_NILL: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32); - case SystemZ::ATOMIC_LOAD_NILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32); - case SystemZ::ATOMIC_LOAD_NILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32); - case SystemZ::ATOMIC_LOAD_NGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); - case SystemZ::ATOMIC_LOAD_NILL64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64); - case SystemZ::ATOMIC_LOAD_NILH64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64); - case SystemZ::ATOMIC_LOAD_NIHL64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64); - case SystemZ::ATOMIC_LOAD_NIHH64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64); - case SystemZ::ATOMIC_LOAD_NILF64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64); - case SystemZ::ATOMIC_LOAD_NIHF64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH); case SystemZ::ATOMIC_LOADW_OR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OR); case SystemZ::ATOMIC_LOADW_OILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0); - case SystemZ::ATOMIC_LOAD_OR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); - case SystemZ::ATOMIC_LOAD_OILL: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32); - case SystemZ::ATOMIC_LOAD_OILH: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32); - case SystemZ::ATOMIC_LOAD_OILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32); - case SystemZ::ATOMIC_LOAD_OGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); - case SystemZ::ATOMIC_LOAD_OILL64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64); - case SystemZ::ATOMIC_LOAD_OILH64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64); - case SystemZ::ATOMIC_LOAD_OIHL64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64); - case SystemZ::ATOMIC_LOAD_OIHH64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64); - case SystemZ::ATOMIC_LOAD_OILF64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64); - case SystemZ::ATOMIC_LOAD_OIHF64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH); case SystemZ::ATOMIC_LOADW_XR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); + return emitAtomicLoadBinary(MI, MBB, SystemZ::XR); case SystemZ::ATOMIC_LOADW_XILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0); - case SystemZ::ATOMIC_LOAD_XR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); - case SystemZ::ATOMIC_LOAD_XILF: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32); - case SystemZ::ATOMIC_LOAD_XGR: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); - case SystemZ::ATOMIC_LOAD_XILF64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64); - case SystemZ::ATOMIC_LOAD_XIHF64: - return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64); + return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF); case SystemZ::ATOMIC_LOADW_NRi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true); case SystemZ::ATOMIC_LOADW_NILHi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true); - case SystemZ::ATOMIC_LOAD_NRi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); - case SystemZ::ATOMIC_LOAD_NILLi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true); - case SystemZ::ATOMIC_LOAD_NILHi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true); - case SystemZ::ATOMIC_LOAD_NILFi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true); - case SystemZ::ATOMIC_LOAD_NGRi: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); - case SystemZ::ATOMIC_LOAD_NILL64i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true); - case SystemZ::ATOMIC_LOAD_NILH64i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true); - case SystemZ::ATOMIC_LOAD_NIHL64i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true); - case SystemZ::ATOMIC_LOAD_NIHH64i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true); - case SystemZ::ATOMIC_LOAD_NILF64i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true); - case SystemZ::ATOMIC_LOAD_NIHF64i: - return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true); + return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true); case SystemZ::ATOMIC_LOADW_MIN: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, - SystemZ::CCMASK_CMP_LE, 0); - case SystemZ::ATOMIC_LOAD_MIN_32: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, - SystemZ::CCMASK_CMP_LE, 32); - case SystemZ::ATOMIC_LOAD_MIN_64: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, - SystemZ::CCMASK_CMP_LE, 64); - + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE); case SystemZ::ATOMIC_LOADW_MAX: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, - SystemZ::CCMASK_CMP_GE, 0); - case SystemZ::ATOMIC_LOAD_MAX_32: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, - SystemZ::CCMASK_CMP_GE, 32); - case SystemZ::ATOMIC_LOAD_MAX_64: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, - SystemZ::CCMASK_CMP_GE, 64); - + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE); case SystemZ::ATOMIC_LOADW_UMIN: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, - SystemZ::CCMASK_CMP_LE, 0); - case SystemZ::ATOMIC_LOAD_UMIN_32: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, - SystemZ::CCMASK_CMP_LE, 32); - case SystemZ::ATOMIC_LOAD_UMIN_64: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, - SystemZ::CCMASK_CMP_LE, 64); - + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE); case SystemZ::ATOMIC_LOADW_UMAX: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, - SystemZ::CCMASK_CMP_GE, 0); - case SystemZ::ATOMIC_LOAD_UMAX_32: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, - SystemZ::CCMASK_CMP_GE, 32); - case SystemZ::ATOMIC_LOAD_UMAX_64: - return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, - SystemZ::CCMASK_CMP_GE, 64); + return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE); case SystemZ::ATOMIC_CMP_SWAPW: return emitAtomicCmpSwapW(MI, MBB); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 40fe433..1e2887c 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -748,13 +748,12 @@ private: bool ClearEven) const; MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI, MachineBasicBlock *BB, - unsigned BinOpcode, unsigned BitSize, + unsigned BinOpcode, bool Invert = false) const; MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, - unsigned KeepOldMask, - unsigned BitSize) const; + unsigned KeepOldMask) const; MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *emitMemMemWrapper(MachineInstr &MI, MachineBasicBlock *BB, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td index a25719f..2e5ff4a 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -5327,30 +5327,6 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store, } } -// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND -// describe the second (non-memory) operand. -class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls, - dag pat, DAGOperand operand> - : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2), - [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> { - let Defs = [CC]; - let Has20BitOffset = 1; - let mayLoad = 1; - let mayStore = 1; - let usesCustomInserter = 1; - let hasNoSchedulingInfo = 1; -} - -// Specializations of AtomicLoadWBinary. -class AtomicLoadBinaryReg32<SDPatternOperator operator> - : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>; -class AtomicLoadBinaryImm32<SDPatternOperator operator, ImmOpWithPattern imm> - : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>; -class AtomicLoadBinaryReg64<SDPatternOperator operator> - : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>; -class AtomicLoadBinaryImm64<SDPatternOperator operator, ImmOpWithPattern imm> - : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>; - // OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND // describe the second (non-memory) operand. class AtomicLoadWBinary<SDPatternOperator operator, dag pat, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 87eb383..210e6a5 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1746,112 +1746,29 @@ let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { } def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>; -def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>; -def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>; def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>; def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>; -let Predicates = [FeatureNoInterlockedAccess1] in { - def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32<atomic_load_add_32>; - def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>; - def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>; - def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64<atomic_load_add_64>; - def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>; - def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>; -} def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>; -def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32<atomic_load_sub_32>; -def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>; def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>; def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>; -let Predicates = [FeatureNoInterlockedAccess1] in { - def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>; - def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32, - imm32ll16c>; - def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32, - imm32lh16c>; - def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>; - def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>; - def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64, - imm64ll16c>; - def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64, - imm64lh16c>; - def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64, - imm64hl16c>; - def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64, - imm64hh16c>; - def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64, - imm64lf32c>; - def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64, - imm64hf32c>; -} def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>; def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>; -let Predicates = [FeatureNoInterlockedAccess1] in { - def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>; - def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>; - def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>; - def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>; - def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>; - def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>; - def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>; - def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>; - def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>; - def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>; - def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>; -} def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>; def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>; -let Predicates = [FeatureNoInterlockedAccess1] in { - def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>; - def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>; - def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>; - def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>; - def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>; -} def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>; def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand, imm32lh16c>; -def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>; -def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm32<atomic_load_nand_32, - imm32ll16c>; -def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm32<atomic_load_nand_32, - imm32lh16c>; -def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>; -def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>; -def ATOMIC_LOAD_NILL64i : AtomicLoadBinaryImm64<atomic_load_nand_64, - imm64ll16c>; -def ATOMIC_LOAD_NILH64i : AtomicLoadBinaryImm64<atomic_load_nand_64, - imm64lh16c>; -def ATOMIC_LOAD_NIHL64i : AtomicLoadBinaryImm64<atomic_load_nand_64, - imm64hl16c>; -def ATOMIC_LOAD_NIHH64i : AtomicLoadBinaryImm64<atomic_load_nand_64, - imm64hh16c>; -def ATOMIC_LOAD_NILF64i : AtomicLoadBinaryImm64<atomic_load_nand_64, - imm64lf32c>; -def ATOMIC_LOAD_NIHF64i : AtomicLoadBinaryImm64<atomic_load_nand_64, - imm64hf32c>; def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>; -def ATOMIC_LOAD_MIN_32 : AtomicLoadBinaryReg32<atomic_load_min_32>; -def ATOMIC_LOAD_MIN_64 : AtomicLoadBinaryReg64<atomic_load_min_64>; - def ATOMIC_LOADW_MAX : AtomicLoadWBinaryReg<z_atomic_loadw_max>; -def ATOMIC_LOAD_MAX_32 : AtomicLoadBinaryReg32<atomic_load_max_32>; -def ATOMIC_LOAD_MAX_64 : AtomicLoadBinaryReg64<atomic_load_max_64>; - def ATOMIC_LOADW_UMIN : AtomicLoadWBinaryReg<z_atomic_loadw_umin>; -def ATOMIC_LOAD_UMIN_32 : AtomicLoadBinaryReg32<atomic_load_umin_32>; -def ATOMIC_LOAD_UMIN_64 : AtomicLoadBinaryReg64<atomic_load_umin_64>; - def ATOMIC_LOADW_UMAX : AtomicLoadWBinaryReg<z_atomic_loadw_umax>; -def ATOMIC_LOAD_UMAX_32 : AtomicLoadBinaryReg32<atomic_load_umax_32>; -def ATOMIC_LOAD_UMAX_64 : AtomicLoadBinaryReg64<atomic_load_umax_64>; def ATOMIC_CMP_SWAPW : Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap, diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-add-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-add-04.ll index 50d3eec..4af3793 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-add-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-add-04.ll @@ -16,13 +16,12 @@ define i64 @f1(i64 %dummy, ptr %src, i64 %b) { ret i64 %res } -; Check addition of 1, which can use AGHI. +; Check addition of 1. define i64 @f2(i64 %dummy, ptr %src) { ; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: lgr %r0, %r2 -; CHECK: aghi %r0, 1 +; CHECK: la %r0, 1(%r2) ; CHECK: csg %r2, %r0, 0(%r3) ; CHECK: jl [[LABEL]] ; CHECK: br %r14 @@ -30,82 +29,64 @@ define i64 @f2(i64 %dummy, ptr %src) { ret i64 %res } -; Check the high end of the AGHI range. +; Check use of LAY. define i64 @f3(i64 %dummy, ptr %src) { ; CHECK-LABEL: f3: -; CHECK: aghi %r0, 32767 +; CHECK: lay %r0, 32767(%r2) ; CHECK: br %r14 %res = atomicrmw add ptr %src, i64 32767 seq_cst ret i64 %res } -; Check the next value up, which must use AGFI. +; Check the high end of the AGFI range. define i64 @f4(i64 %dummy, ptr %src) { ; CHECK-LABEL: f4: -; CHECK: agfi %r0, 32768 +; CHECK: agfi %r0, 2147483647 ; CHECK: br %r14 - %res = atomicrmw add ptr %src, i64 32768 seq_cst + %res = atomicrmw add ptr %src, i64 2147483647 seq_cst ret i64 %res } -; Check the high end of the AGFI range. +; Check the next value up, which uses an ALGFI. define i64 @f5(i64 %dummy, ptr %src) { ; CHECK-LABEL: f5: -; CHECK: agfi %r0, 2147483647 +; CHECK: algfi %r0, 2147483648 ; CHECK: br %r14 - %res = atomicrmw add ptr %src, i64 2147483647 seq_cst + %res = atomicrmw add ptr %src, i64 2147483648 seq_cst ret i64 %res } -; Check the next value up, which must use a register addition. +; Check addition of -1, which can use LAY. define i64 @f6(i64 %dummy, ptr %src) { ; CHECK-LABEL: f6: -; CHECK: agr +; CHECK: lay %r0, -1(%r2) ; CHECK: br %r14 - %res = atomicrmw add ptr %src, i64 2147483648 seq_cst + %res = atomicrmw add ptr %src, i64 -1 seq_cst ret i64 %res } -; Check addition of -1, which can use AGHI. +; LAY still OK. define i64 @f7(i64 %dummy, ptr %src) { ; CHECK-LABEL: f7: -; CHECK: aghi %r0, -1 -; CHECK: br %r14 - %res = atomicrmw add ptr %src, i64 -1 seq_cst - ret i64 %res -} - -; Check the low end of the AGHI range. -define i64 @f8(i64 %dummy, ptr %src) { -; CHECK-LABEL: f8: -; CHECK: aghi %r0, -32768 -; CHECK: br %r14 - %res = atomicrmw add ptr %src, i64 -32768 seq_cst - ret i64 %res -} - -; Check the next value down, which must use AGFI instead. -define i64 @f9(i64 %dummy, ptr %src) { -; CHECK-LABEL: f9: -; CHECK: agfi %r0, -32769 +; CHECK: lay %r0, -32769(%r2) ; CHECK: br %r14 %res = atomicrmw add ptr %src, i64 -32769 seq_cst ret i64 %res } ; Check the low end of the AGFI range. -define i64 @f10(i64 %dummy, ptr %src) { -; CHECK-LABEL: f10: +define i64 @f8(i64 %dummy, ptr %src) { +; CHECK-LABEL: f8: ; CHECK: agfi %r0, -2147483648 ; CHECK: br %r14 %res = atomicrmw add ptr %src, i64 -2147483648 seq_cst ret i64 %res } -; Check the next value down, which must use a register addition. -define i64 @f11(i64 %dummy, ptr %src) { -; CHECK-LABEL: f11: -; CHECK: agr +; Check the next value down, which uses an SLGFI. +define i64 @f9(i64 %dummy, ptr %src) { +; CHECK-LABEL: f9: +; CHECK: slgfi %r0, 2147483649 ; CHECK: br %r14 %res = atomicrmw add ptr %src, i64 -2147483649 seq_cst ret i64 %res diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-and-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-and-03.ll index 03ed240..96c82e6 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-and-03.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-and-03.ll @@ -33,7 +33,7 @@ define i32 @f2(i32 %dummy, ptr %src) { ; Check ANDs of the low end of the NILH range. define i32 @f3(i32 %dummy, ptr %src) { ; CHECK-LABEL: f3: -; CHECK: nilh %r0, 0 +; CHECK: llhr %r0, %r2 ; CHECK: br %r14 %res = atomicrmw and ptr %src, i32 65535 seq_cst ret i32 %res diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-and-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-and-04.ll index 00b6fd9..9647548 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-and-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-and-04.ll @@ -16,11 +16,10 @@ define i64 @f1(i64 %dummy, ptr %src, i64 %b) { ret i64 %res } -; Check ANDs of 1, which are done using a register. (We could use RISBG -; instead, but that isn't implemented yet.) +; Check ANDs of 1, which are done using a register. define i64 @f2(i64 %dummy, ptr %src) { ; CHECK-LABEL: f2: -; CHECK: ngr +; CHECK: risbg ; CHECK: br %r14 %res = atomicrmw and ptr %src, i64 1 seq_cst ret i64 %res @@ -56,7 +55,7 @@ define i64 @f4(i64 %dummy, ptr %src) { ; Check the next value up, which must use a register. define i64 @f5(i64 %dummy, ptr %src) { ; CHECK-LABEL: f5: -; CHECK: ngr +; CHECK: risbg ; CHECK: br %r14 %res = atomicrmw and ptr %src, i64 12884901888 seq_cst ret i64 %res @@ -74,7 +73,7 @@ define i64 @f6(i64 %dummy, ptr %src) { ; Check the next value up, which must use a register. define i64 @f7(i64 %dummy, ptr %src) { ; CHECK-LABEL: f7: -; CHECK: ngr +; CHECK: risbg ; CHECK: br %r14 %res = atomicrmw and ptr %src, i64 281474976710656 seq_cst ret i64 %res diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll index d633c2d..d107e5d 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll @@ -1,21 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; Test 32-bit atomic minimum and maximum. Here we match the z10 versions, ; which can't use LOCR. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; Todo: If-converter no longer producing CondReturns (with AtomicExpand pass). + ; Check signed minimum. define i32 @f1(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f1: -; CHECK: l %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP]]: -; CHECK: lr [[NEW]], %r2 -; CHECK: crjle %r2, %r4, [[KEEP:\..*]] -; CHECK: lr [[NEW]], %r4 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: l %r2, 0(%r3) +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: .LBB0_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r4, .LBB0_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB0_1 +; CHECK-NEXT: .LBB0_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw min ptr %src, i32 %b seq_cst ret i32 %res } @@ -23,16 +33,23 @@ define i32 @f1(i32 %dummy, ptr %src, i32 %b) { ; Check signed maximum. define i32 @f2(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f2: -; CHECK: l %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP]]: -; CHECK: lr [[NEW]], %r2 -; CHECK: crjhe %r2, %r4, [[KEEP:\..*]] -; CHECK: lr [[NEW]], %r4 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: l %r2, 0(%r3) +; CHECK-NEXT: j .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB1_4 +; CHECK-NEXT: .LBB1_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjh %r2, %r4, .LBB1_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB1_1 +; CHECK-NEXT: .LBB1_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw max ptr %src, i32 %b seq_cst ret i32 %res } @@ -40,16 +57,23 @@ define i32 @f2(i32 %dummy, ptr %src, i32 %b) { ; Check unsigned minimum. define i32 @f3(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f3: -; CHECK: l %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP]]: -; CHECK: lr [[NEW]], %r2 -; CHECK: clrjle %r2, %r4, [[KEEP:\..*]] -; CHECK: lr [[NEW]], %r4 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: l %r2, 0(%r3) +; CHECK-NEXT: j .LBB2_2 +; CHECK-NEXT: .LBB2_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB2_4 +; CHECK-NEXT: .LBB2_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: clrjle %r2, %r4, .LBB2_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB2_1 +; CHECK-NEXT: .LBB2_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw umin ptr %src, i32 %b seq_cst ret i32 %res } @@ -57,16 +81,23 @@ define i32 @f3(i32 %dummy, ptr %src, i32 %b) { ; Check unsigned maximum. define i32 @f4(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f4: -; CHECK: l %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP]]: -; CHECK: lr [[NEW]], %r2 -; CHECK: clrjhe %r2, %r4, [[KEEP:\..*]] -; CHECK: lr [[NEW]], %r4 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: l %r2, 0(%r3) +; CHECK-NEXT: j .LBB3_2 +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB3_4 +; CHECK-NEXT: .LBB3_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: clrjh %r2, %r4, .LBB3_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB3_1 +; CHECK-NEXT: .LBB3_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw umax ptr %src, i32 %b seq_cst ret i32 %res } @@ -74,9 +105,23 @@ define i32 @f4(i32 %dummy, ptr %src, i32 %b) { ; Check the high end of the aligned CS range. define i32 @f5(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f5: -; CHECK: l %r2, 4092(%r3) -; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: l %r2, 4092(%r3) +; CHECK-NEXT: j .LBB4_2 +; CHECK-NEXT: .LBB4_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 4092(%r3) +; CHECK-NEXT: je .LBB4_4 +; CHECK-NEXT: .LBB4_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r4, .LBB4_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB4_1 +; CHECK-NEXT: .LBB4_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i32, ptr %src, i64 1023 %res = atomicrmw min ptr %ptr, i32 %b seq_cst ret i32 %res @@ -85,9 +130,23 @@ define i32 @f5(i32 %dummy, ptr %src, i32 %b) { ; Check the next word up, which requires CSY. define i32 @f6(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f6: -; CHECK: ly %r2, 4096(%r3) -; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: ly %r2, 4096(%r3) +; CHECK-NEXT: j .LBB5_2 +; CHECK-NEXT: .LBB5_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: csy %r2, %r0, 4096(%r3) +; CHECK-NEXT: je .LBB5_4 +; CHECK-NEXT: .LBB5_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r4, .LBB5_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB5_1 +; CHECK-NEXT: .LBB5_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i32, ptr %src, i64 1024 %res = atomicrmw min ptr %ptr, i32 %b seq_cst ret i32 %res @@ -96,9 +155,23 @@ define i32 @f6(i32 %dummy, ptr %src, i32 %b) { ; Check the high end of the aligned CSY range. define i32 @f7(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f7: -; CHECK: ly %r2, 524284(%r3) -; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: ly %r2, 524284(%r3) +; CHECK-NEXT: j .LBB6_2 +; CHECK-NEXT: .LBB6_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: csy %r2, %r0, 524284(%r3) +; CHECK-NEXT: je .LBB6_4 +; CHECK-NEXT: .LBB6_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r4, .LBB6_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB6_1 +; CHECK-NEXT: .LBB6_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i32, ptr %src, i64 131071 %res = atomicrmw min ptr %ptr, i32 %b seq_cst ret i32 %res @@ -107,10 +180,24 @@ define i32 @f7(i32 %dummy, ptr %src, i32 %b) { ; Check the next word up, which needs separate address logic. define i32 @f8(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f8: -; CHECK: agfi %r3, 524288 -; CHECK: l %r2, 0(%r3) -; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: agfi %r3, 524288 +; CHECK-NEXT: l %r2, 0(%r3) +; CHECK-NEXT: j .LBB7_2 +; CHECK-NEXT: .LBB7_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB7_4 +; CHECK-NEXT: .LBB7_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r4, .LBB7_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB7_1 +; CHECK-NEXT: .LBB7_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i32, ptr %src, i64 131072 %res = atomicrmw min ptr %ptr, i32 %b seq_cst ret i32 %res @@ -119,9 +206,23 @@ define i32 @f8(i32 %dummy, ptr %src, i32 %b) { ; Check the high end of the negative aligned CSY range. define i32 @f9(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f9: -; CHECK: ly %r2, -4(%r3) -; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: ly %r2, -4(%r3) +; CHECK-NEXT: j .LBB8_2 +; CHECK-NEXT: .LBB8_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB8_2 Depth=1 +; CHECK-NEXT: csy %r2, %r0, -4(%r3) +; CHECK-NEXT: je .LBB8_4 +; CHECK-NEXT: .LBB8_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r4, .LBB8_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB8_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB8_1 +; CHECK-NEXT: .LBB8_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i32, ptr %src, i64 -1 %res = atomicrmw min ptr %ptr, i32 %b seq_cst ret i32 %res @@ -130,9 +231,23 @@ define i32 @f9(i32 %dummy, ptr %src, i32 %b) { ; Check the low end of the CSY range. define i32 @f10(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f10: -; CHECK: ly %r2, -524288(%r3) -; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: ly %r2, -524288(%r3) +; CHECK-NEXT: j .LBB9_2 +; CHECK-NEXT: .LBB9_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB9_2 Depth=1 +; CHECK-NEXT: csy %r2, %r0, -524288(%r3) +; CHECK-NEXT: je .LBB9_4 +; CHECK-NEXT: .LBB9_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r4, .LBB9_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB9_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB9_1 +; CHECK-NEXT: .LBB9_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i32, ptr %src, i64 -131072 %res = atomicrmw min ptr %ptr, i32 %b seq_cst ret i32 %res @@ -141,10 +256,24 @@ define i32 @f10(i32 %dummy, ptr %src, i32 %b) { ; Check the next word down, which needs separate address logic. define i32 @f11(i32 %dummy, ptr %src, i32 %b) { ; CHECK-LABEL: f11: -; CHECK: agfi %r3, -524292 -; CHECK: l %r2, 0(%r3) -; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: agfi %r3, -524292 +; CHECK-NEXT: l %r2, 0(%r3) +; CHECK-NEXT: j .LBB10_2 +; CHECK-NEXT: .LBB10_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB10_4 +; CHECK-NEXT: .LBB10_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r4, .LBB10_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1 +; CHECK-NEXT: lr %r0, %r4 +; CHECK-NEXT: j .LBB10_1 +; CHECK-NEXT: .LBB10_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i32, ptr %src, i64 -131073 %res = atomicrmw min ptr %ptr, i32 %b seq_cst ret i32 %res @@ -153,10 +282,24 @@ define i32 @f11(i32 %dummy, ptr %src, i32 %b) { ; Check that indexed addresses are not allowed. define i32 @f12(i32 %dummy, i64 %base, i64 %index, i32 %b) { ; CHECK-LABEL: f12: -; CHECK: agr %r3, %r4 -; CHECK: l %r2, 0(%r3) -; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: l %r2, 0(%r4,%r3) +; CHECK-NEXT: agr %r3, %r4 +; CHECK-NEXT: j .LBB11_2 +; CHECK-NEXT: .LBB11_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB11_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB11_4 +; CHECK-NEXT: .LBB11_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: crjle %r2, %r5, .LBB11_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB11_2 Depth=1 +; CHECK-NEXT: lr %r0, %r5 +; CHECK-NEXT: j .LBB11_1 +; CHECK-NEXT: .LBB11_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %add = add i64 %base, %index %ptr = inttoptr i64 %add to ptr %res = atomicrmw min ptr %ptr, i32 %b seq_cst @@ -166,16 +309,23 @@ define i32 @f12(i32 %dummy, i64 %base, i64 %index, i32 %b) { ; Check that constants are handled. define i32 @f13(i32 %dummy, ptr %ptr) { ; CHECK-LABEL: f13: -; CHECK: lhi [[LIMIT:%r[0-9]+]], 42 -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: cs %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP]]: -; CHECK: lr [[NEW]], %r2 -; CHECK: crjle %r2, [[LIMIT]], [[KEEP:\..*]] -; CHECK: lhi [[NEW]], 42 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: l %r2, 0(%r3) +; CHECK-NEXT: j .LBB12_2 +; CHECK-NEXT: .LBB12_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 +; CHECK-NEXT: cs %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB12_4 +; CHECK-NEXT: .LBB12_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr %r0, %r2 +; CHECK-NEXT: cijl %r2, 43, .LBB12_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 +; CHECK-NEXT: lhi %r0, 42 +; CHECK-NEXT: j .LBB12_1 +; CHECK-NEXT: .LBB12_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw min ptr %ptr, i32 42 seq_cst ret i32 %res } diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll index 64e76e0..9352118 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll @@ -1,21 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; Test 64-bit atomic minimum and maximum. Here we match the z10 versions, ; which can't use LOCGR. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; Todo: If-converter no longer producing CondReturns (with AtomicExpand pass). + ; Check signed minimum. define i64 @f1(i64 %dummy, ptr %src, i64 %b) { ; CHECK-LABEL: f1: -; CHECK: lg %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: cgrjle %r2, %r4, [[KEEP:\..*]] -; CHECK: lgr [[NEW]], %r4 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r2, 0(%r3) +; CHECK-NEXT: j .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: .LBB0_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB0_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: j .LBB0_1 +; CHECK-NEXT: .LBB0_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw min ptr %src, i64 %b seq_cst ret i64 %res } @@ -23,16 +33,23 @@ define i64 @f1(i64 %dummy, ptr %src, i64 %b) { ; Check signed maximum. define i64 @f2(i64 %dummy, ptr %src, i64 %b) { ; CHECK-LABEL: f2: -; CHECK: lg %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: cgrjhe %r2, %r4, [[KEEP:\..*]] -; CHECK: lgr [[NEW]], %r4 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r2, 0(%r3) +; CHECK-NEXT: j .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB1_4 +; CHECK-NEXT: .LBB1_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: cgrjh %r2, %r4, .LBB1_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: j .LBB1_1 +; CHECK-NEXT: .LBB1_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw max ptr %src, i64 %b seq_cst ret i64 %res } @@ -40,16 +57,23 @@ define i64 @f2(i64 %dummy, ptr %src, i64 %b) { ; Check unsigned minimum. define i64 @f3(i64 %dummy, ptr %src, i64 %b) { ; CHECK-LABEL: f3: -; CHECK: lg %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: clgrjle %r2, %r4, [[KEEP:\..*]] -; CHECK: lgr [[NEW]], %r4 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r2, 0(%r3) +; CHECK-NEXT: j .LBB2_2 +; CHECK-NEXT: .LBB2_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB2_4 +; CHECK-NEXT: .LBB2_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: clgrjle %r2, %r4, .LBB2_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: j .LBB2_1 +; CHECK-NEXT: .LBB2_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw umin ptr %src, i64 %b seq_cst ret i64 %res } @@ -57,16 +81,23 @@ define i64 @f3(i64 %dummy, ptr %src, i64 %b) { ; Check unsigned maximum. define i64 @f4(i64 %dummy, ptr %src, i64 %b) { ; CHECK-LABEL: f4: -; CHECK: lg %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: clgrjhe %r2, %r4, [[KEEP:\..*]] -; CHECK: lgr [[NEW]], %r4 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r2, 0(%r3) +; CHECK-NEXT: j .LBB3_2 +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB3_4 +; CHECK-NEXT: .LBB3_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: clgrjh %r2, %r4, .LBB3_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: j .LBB3_1 +; CHECK-NEXT: .LBB3_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw umax ptr %src, i64 %b seq_cst ret i64 %res } @@ -74,9 +105,23 @@ define i64 @f4(i64 %dummy, ptr %src, i64 %b) { ; Check the high end of the aligned CSG range. define i64 @f5(i64 %dummy, ptr %src, i64 %b) { ; CHECK-LABEL: f5: -; CHECK: lg %r2, 524280(%r3) -; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r2, 524280(%r3) +; CHECK-NEXT: j .LBB4_2 +; CHECK-NEXT: .LBB4_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 524280(%r3) +; CHECK-NEXT: je .LBB4_4 +; CHECK-NEXT: .LBB4_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB4_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: j .LBB4_1 +; CHECK-NEXT: .LBB4_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i64, ptr %src, i64 65535 %res = atomicrmw min ptr %ptr, i64 %b seq_cst ret i64 %res @@ -85,10 +130,24 @@ define i64 @f5(i64 %dummy, ptr %src, i64 %b) { ; Check the next doubleword up, which requires separate address logic. define i64 @f6(i64 %dummy, ptr %src, i64 %b) { ; CHECK-LABEL: f6: -; CHECK: agfi %r3, 524288 -; CHECK: lg %r2, 0(%r3) -; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: agfi %r3, 524288 +; CHECK-NEXT: lg %r2, 0(%r3) +; CHECK-NEXT: j .LBB5_2 +; CHECK-NEXT: .LBB5_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB5_4 +; CHECK-NEXT: .LBB5_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB5_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: j .LBB5_1 +; CHECK-NEXT: .LBB5_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i64, ptr %src, i64 65536 %res = atomicrmw min ptr %ptr, i64 %b seq_cst ret i64 %res @@ -97,9 +156,23 @@ define i64 @f6(i64 %dummy, ptr %src, i64 %b) { ; Check the low end of the CSG range. define i64 @f7(i64 %dummy, ptr %src, i64 %b) { ; CHECK-LABEL: f7: -; CHECK: lg %r2, -524288(%r3) -; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r2, -524288(%r3) +; CHECK-NEXT: j .LBB6_2 +; CHECK-NEXT: .LBB6_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, -524288(%r3) +; CHECK-NEXT: je .LBB6_4 +; CHECK-NEXT: .LBB6_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB6_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: j .LBB6_1 +; CHECK-NEXT: .LBB6_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i64, ptr %src, i64 -65536 %res = atomicrmw min ptr %ptr, i64 %b seq_cst ret i64 %res @@ -108,10 +181,24 @@ define i64 @f7(i64 %dummy, ptr %src, i64 %b) { ; Check the next doubleword down, which requires separate address logic. define i64 @f8(i64 %dummy, ptr %src, i64 %b) { ; CHECK-LABEL: f8: -; CHECK: agfi %r3, -524296 -; CHECK: lg %r2, 0(%r3) -; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: agfi %r3, -524296 +; CHECK-NEXT: lg %r2, 0(%r3) +; CHECK-NEXT: j .LBB7_2 +; CHECK-NEXT: .LBB7_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB7_4 +; CHECK-NEXT: .LBB7_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: cgrjle %r2, %r4, .LBB7_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r4 +; CHECK-NEXT: j .LBB7_1 +; CHECK-NEXT: .LBB7_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %ptr = getelementptr i64, ptr %src, i64 -65537 %res = atomicrmw min ptr %ptr, i64 %b seq_cst ret i64 %res @@ -120,10 +207,24 @@ define i64 @f8(i64 %dummy, ptr %src, i64 %b) { ; Check that indexed addresses are not allowed. define i64 @f9(i64 %dummy, i64 %base, i64 %index, i64 %b) { ; CHECK-LABEL: f9: -; CHECK: agr %r3, %r4 -; CHECK: lg %r2, 0(%r3) -; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) -; CHECK: ber %r14 +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r2, 0(%r4,%r3) +; CHECK-NEXT: agr %r3, %r4 +; CHECK-NEXT: j .LBB8_2 +; CHECK-NEXT: .LBB8_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB8_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB8_4 +; CHECK-NEXT: .LBB8_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: cgrjle %r2, %r5, .LBB8_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB8_2 Depth=1 +; CHECK-NEXT: lgr %r0, %r5 +; CHECK-NEXT: j .LBB8_1 +; CHECK-NEXT: .LBB8_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %add = add i64 %base, %index %ptr = inttoptr i64 %add to ptr %res = atomicrmw min ptr %ptr, i64 %b seq_cst @@ -133,17 +234,23 @@ define i64 @f9(i64 %dummy, i64 %base, i64 %index, i64 %b) { ; Check that constants are handled. define i64 @f10(i64 %dummy, ptr %ptr) { ; CHECK-LABEL: f10: -; CHECK-DAG: lghi [[LIMIT:%r[0-9]+]], 42 -; CHECK-DAG: lg %r2, 0(%r3) -; CHECK: j [[LOOP:\.[^:]*]] -; CHECK: [[BB1:\.[^:]*]]: -; CHECK: csg %r2, [[NEW:%r[0-9]+]], 0(%r3) -; CHECK: ber %r14 -; CHECK: [[LOOP:\.[^:]*]]: -; CHECK: lgr [[NEW:%r[0-9]+]], %r2 -; CHECK: cgrjle %r2, [[LIMIT]], [[KEEP:\..*]] -; CHECK: lghi [[NEW]], 42 -; CHECK: j [[BB1]] +; CHECK: # %bb.0: +; CHECK-NEXT: lg %r2, 0(%r3) +; CHECK-NEXT: j .LBB9_2 +; CHECK-NEXT: .LBB9_1: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB9_2 Depth=1 +; CHECK-NEXT: csg %r2, %r0, 0(%r3) +; CHECK-NEXT: je .LBB9_4 +; CHECK-NEXT: .LBB9_2: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lgr %r0, %r2 +; CHECK-NEXT: cgijl %r2, 43, .LBB9_1 +; CHECK-NEXT: # %bb.3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB9_2 Depth=1 +; CHECK-NEXT: lghi %r0, 42 +; CHECK-NEXT: j .LBB9_1 +; CHECK-NEXT: .LBB9_4: # %atomicrmw.end +; CHECK-NEXT: br %r14 %res = atomicrmw min ptr %ptr, i64 42 seq_cst ret i64 %res } diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-nand-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-nand-03.ll index 323eafb..8e9870f1 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-nand-03.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-nand-03.ll @@ -17,14 +17,14 @@ define i32 @f1(i32 %dummy, ptr %src, i32 %b) { ret i32 %res } -; Check NANDs of 1. +; Check NANDs with different constant operands. define i32 @f2(i32 %dummy, ptr %src) { ; CHECK-LABEL: f2: ; CHECK: l %r2, 0(%r3) ; CHECK: [[LABEL:\.[^ ]*]]: ; CHECK: lr %r0, %r2 -; CHECK: nilf %r0, 1 ; CHECK: xilf %r0, 4294967295 +; CHECK: oilf %r0, 4294967294 ; CHECK: cs %r2, %r0, 0(%r3) ; CHECK: jl [[LABEL]] ; CHECK: br %r14 @@ -32,61 +32,55 @@ define i32 @f2(i32 %dummy, ptr %src) { ret i32 %res } -; Check NANDs of the low end of the NILH range. define i32 @f3(i32 %dummy, ptr %src) { ; CHECK-LABEL: f3: -; CHECK: nilh %r0, 0 ; CHECK: xilf %r0, 4294967295 +; CHECK: oilh %r0, 65535 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i32 65535 seq_cst ret i32 %res } -; Check the next value up, which must use NILF. define i32 @f4(i32 %dummy, ptr %src) { ; CHECK-LABEL: f4: -; CHECK: nilf %r0, 65536 ; CHECK: xilf %r0, 4294967295 +; CHECK: oilf %r0, 4294901759 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i32 65536 seq_cst ret i32 %res } -; Check the largest useful NILL value. define i32 @f5(i32 %dummy, ptr %src) { ; CHECK-LABEL: f5: -; CHECK: nill %r0, 65534 ; CHECK: xilf %r0, 4294967295 +; CHECK: oill %r0, 1 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i32 -2 seq_cst ret i32 %res } -; Check the low end of the NILL range. define i32 @f6(i32 %dummy, ptr %src) { ; CHECK-LABEL: f6: -; CHECK: nill %r0, 0 ; CHECK: xilf %r0, 4294967295 +; CHECK: oill %r0, 65535 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i32 -65536 seq_cst ret i32 %res } -; Check the largest useful NILH value, which is one less than the above. define i32 @f7(i32 %dummy, ptr %src) { ; CHECK-LABEL: f7: -; CHECK: nilh %r0, 65534 ; CHECK: xilf %r0, 4294967295 +; CHECK: oilh %r0, 1 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i32 -65537 seq_cst ret i32 %res } -; Check the highest useful NILF value, which is one less than the above. define i32 @f8(i32 %dummy, ptr %src) { ; CHECK-LABEL: f8: -; CHECK: nilf %r0, 4294901758 ; CHECK: xilf %r0, 4294967295 +; CHECK: oilf %r0, 65537 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i32 -65538 seq_cst ret i32 %res diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-nand-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-nand-04.ll index b370302..98056c9 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-nand-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-nand-04.ll @@ -9,8 +9,8 @@ define i64 @f1(i64 %dummy, ptr %src, i64 %b) { ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lgr %r0, %r2 ; CHECK: ngr %r0, %r4 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 ; CHECK: csg %r2, %r0, 0(%r3) ; CHECK: jl [[LABEL]] ; CHECK: br %r14 @@ -18,40 +18,39 @@ define i64 @f1(i64 %dummy, ptr %src, i64 %b) { ret i64 %res } -; Check NANDs of 1, which are done using a register. (We could use RISBG -; instead, but that isn't implemented yet.) +; Check NANDs of 1, which are done using a register. define i64 @f2(i64 %dummy, ptr %src) { ; CHECK-LABEL: f2: -; CHECK: ngr +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihf %r0, 4294967295 +; CHECK: oilf %r0, 4294967294 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 1 seq_cst ret i64 %res } -; Check the equivalent of NIHF with 1, which can use RISBG instead. define i64 @f3(i64 %dummy, ptr %src) { ; CHECK-LABEL: f3: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: risbg %r0, %r2, 31, 191, 0 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 -; CHECK: csg %r2, %r0, 0(%r3) +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihf %r0, 4294967294 ; CHECK: jl [[LABEL]] ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 8589934591 seq_cst ret i64 %res } -; Check the lowest NIHF value outside the range of RISBG. define i64 @f4(i64 %dummy, ptr %src) { ; CHECK-LABEL: f4: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: ; CHECK: lgr %r0, %r2 -; CHECK: nihf %r0, 2 -; CHECK: lcgr %r0, %r0 -; CHECK: aghi %r0, -1 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihf %r0, 4294967293 ; CHECK: csg %r2, %r0, 0(%r3) ; CHECK: jl [[LABEL]] ; CHECK: br %r14 @@ -59,118 +58,133 @@ define i64 @f4(i64 %dummy, ptr %src) { ret i64 %res } -; Check the next value up, which must use a register. define i64 @f5(i64 %dummy, ptr %src) { ; CHECK-LABEL: f5: -; CHECK: ngr +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihf %r0, 4294967292 +; CHECK: oilf %r0, 4294967295 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 12884901888 seq_cst ret i64 %res } -; Check the lowest NIHH value outside the range of RISBG. define i64 @f6(i64 %dummy, ptr %src) { ; CHECK-LABEL: f6: -; CHECK: nihh {{%r[0-5]}}, 2 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihh %r0, 65533 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 844424930131967 seq_cst ret i64 %res } -; Check the next value up, which must use a register. define i64 @f7(i64 %dummy, ptr %src) { ; CHECK-LABEL: f7: -; CHECK: ngr +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihf %r0, 4294901759 +; CHECK: oilf %r0, 4294967295 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 281474976710656 seq_cst ret i64 %res } -; Check the highest NILL value outside the range of RISBG. define i64 @f8(i64 %dummy, ptr %src) { ; CHECK-LABEL: f8: -; CHECK: nill {{%r[0-5]}}, 65530 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oill %r0, 5 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -6 seq_cst ret i64 %res } -; Check the lowest NILL value outside the range of RISBG. define i64 @f9(i64 %dummy, ptr %src) { ; CHECK-LABEL: f9: -; CHECK: nill {{%r[0-5]}}, 2 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oill %r0, 65533 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -65534 seq_cst ret i64 %res } -; Check the highest useful NILF value. define i64 @f10(i64 %dummy, ptr %src) { ; CHECK-LABEL: f10: -; CHECK: nilf {{%r[0-5]}}, 4294901758 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oilf %r0, 65537 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -65538 seq_cst ret i64 %res } -; Check the highest NILH value outside the range of RISBG. define i64 @f11(i64 %dummy, ptr %src) { ; CHECK-LABEL: f11: -; CHECK: nilh {{%r[0-5]}}, 65530 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oilh %r0, 5 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -327681 seq_cst ret i64 %res } -; Check the lowest NILH value outside the range of RISBG. define i64 @f12(i64 %dummy, ptr %src) { ; CHECK-LABEL: f12: -; CHECK: nilh {{%r[0-5]}}, 2 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oilh %r0, 65533 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -4294770689 seq_cst ret i64 %res } -; Check the lowest NILF value outside the range of RISBG. define i64 @f13(i64 %dummy, ptr %src) { ; CHECK-LABEL: f13: -; CHECK: nilf {{%r[0-5]}}, 2 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oilf %r0, 4294967293 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -4294967294 seq_cst ret i64 %res } -; Check the highest NIHL value outside the range of RISBG. define i64 @f14(i64 %dummy, ptr %src) { ; CHECK-LABEL: f14: -; CHECK: nihl {{%r[0-5]}}, 65530 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihl %r0, 5 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -21474836481 seq_cst ret i64 %res } -; Check the lowest NIHL value outside the range of RISBG. define i64 @f15(i64 %dummy, ptr %src) { ; CHECK-LABEL: f15: -; CHECK: nihl {{%r[0-5]}}, 2 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihl %r0, 65533 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -281462091808769 seq_cst ret i64 %res } -; Check the highest NIHH value outside the range of RISBG. define i64 @f16(i64 %dummy, ptr %src) { ; CHECK-LABEL: f16: -; CHECK: nihh {{%r[0-5]}}, 65530 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihh %r0, 5 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -1407374883553281 seq_cst ret i64 %res } -; Check the highest useful NIHF value. define i64 @f17(i64 %dummy, ptr %src) { ; CHECK-LABEL: f17: -; CHECK: nihf {{%r[0-5]}}, 4294901758 +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 4294967295 +; CHECK: oihf %r0, 65537 ; CHECK: br %r14 %res = atomicrmw nand ptr %src, i64 -281479271677953 seq_cst ret i64 %res diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-or-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-or-04.ll index a1322df..e29097b 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-or-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-or-04.ll @@ -93,11 +93,11 @@ define i64 @f9(i64 %dummy, ptr %src) { ret i64 %res } -; Check the next value up, which must use a register. (We could use -; combinations of OIH* and OIL* instead, but that isn't implemented.) +; Check the next value up, which must use a register. define i64 @f10(i64 %dummy, ptr %src) { ; CHECK-LABEL: f10: -; CHECK: ogr +; CHECK: oihl %r0, 1 +; CHECK: oill %r0, 1 ; CHECK: br %r14 %res = atomicrmw or ptr %src, i64 4294967297 seq_cst ret i64 %res @@ -139,10 +139,11 @@ define i64 @f14(i64 %dummy, ptr %src) { ret i64 %res } -; Check the next value up, which must use a register. +; Check the next value up. define i64 @f15(i64 %dummy, ptr %src) { ; CHECK-LABEL: f15: -; CHECK: ogr +; CHECK: oihh %r0, 65535 +; CHECK: oill %r0, 1 ; CHECK: br %r14 %res = atomicrmw or ptr %src, i64 18446462598732840961 seq_cst ret i64 %res diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-sub-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-sub-04.ll index 5d23d4e..d18c72f 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-sub-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-sub-04.ll @@ -16,13 +16,12 @@ define i64 @f1(i64 %dummy, ptr %src, i64 %b) { ret i64 %res } -; Check subtraction of 1, which can use AGHI. +; Check subtraction of 1. define i64 @f2(i64 %dummy, ptr %src) { ; CHECK-LABEL: f2: ; CHECK: lg %r2, 0(%r3) ; CHECK: [[LABEL:\.[^:]*]]: -; CHECK: lgr %r0, %r2 -; CHECK: aghi %r0, -1 +; CHECK: lay %r0, -1(%r2) ; CHECK: csg %r2, %r0, 0(%r3) ; CHECK: jl [[LABEL]] ; CHECK: br %r14 @@ -30,82 +29,64 @@ define i64 @f2(i64 %dummy, ptr %src) { ret i64 %res } -; Check the low end of the AGHI range. +; Check use of LAY. define i64 @f3(i64 %dummy, ptr %src) { ; CHECK-LABEL: f3: -; CHECK: aghi %r0, -32768 +; CHECK: lay %r0, -32768(%r2) ; CHECK: br %r14 %res = atomicrmw sub ptr %src, i64 32768 seq_cst ret i64 %res } -; Check the next value up, which must use AGFI. +; Check the low end of the AGFI range. define i64 @f4(i64 %dummy, ptr %src) { ; CHECK-LABEL: f4: -; CHECK: agfi %r0, -32769 +; CHECK: agfi %r0, -2147483648 ; CHECK: br %r14 - %res = atomicrmw sub ptr %src, i64 32769 seq_cst + %res = atomicrmw sub ptr %src, i64 2147483648 seq_cst ret i64 %res } -; Check the low end of the AGFI range. +; Check the next value up, which uses an SLGFI. define i64 @f5(i64 %dummy, ptr %src) { ; CHECK-LABEL: f5: -; CHECK: agfi %r0, -2147483648 +; CHECK: slgfi ; CHECK: br %r14 - %res = atomicrmw sub ptr %src, i64 2147483648 seq_cst + %res = atomicrmw sub ptr %src, i64 2147483649 seq_cst ret i64 %res } -; Check the next value up, which must use a register operation. +; Check subtraction of -1, which can use LA. define i64 @f6(i64 %dummy, ptr %src) { ; CHECK-LABEL: f6: -; CHECK: sgr +; CHECK: la %r0, 1(%r2) ; CHECK: br %r14 - %res = atomicrmw sub ptr %src, i64 2147483649 seq_cst + %res = atomicrmw sub ptr %src, i64 -1 seq_cst ret i64 %res } -; Check subtraction of -1, which can use AGHI. +; Check use of LAY. define i64 @f7(i64 %dummy, ptr %src) { ; CHECK-LABEL: f7: -; CHECK: aghi %r0, 1 -; CHECK: br %r14 - %res = atomicrmw sub ptr %src, i64 -1 seq_cst - ret i64 %res -} - -; Check the high end of the AGHI range. -define i64 @f8(i64 %dummy, ptr %src) { -; CHECK-LABEL: f8: -; CHECK: aghi %r0, 32767 +; CHECK: lay %r0, 32767(%r2) ; CHECK: br %r14 %res = atomicrmw sub ptr %src, i64 -32767 seq_cst ret i64 %res } -; Check the next value down, which must use AGFI instead. -define i64 @f9(i64 %dummy, ptr %src) { -; CHECK-LABEL: f9: -; CHECK: agfi %r0, 32768 -; CHECK: br %r14 - %res = atomicrmw sub ptr %src, i64 -32768 seq_cst - ret i64 %res -} - ; Check the high end of the AGFI range. -define i64 @f10(i64 %dummy, ptr %src) { -; CHECK-LABEL: f10: +define i64 @f8(i64 %dummy, ptr %src) { +; CHECK-LABEL: f8: ; CHECK: agfi %r0, 2147483647 ; CHECK: br %r14 %res = atomicrmw sub ptr %src, i64 -2147483647 seq_cst ret i64 %res } -; Check the next value down, which must use a register operation. -define i64 @f11(i64 %dummy, ptr %src) { -; CHECK-LABEL: f11: -; CHECK: sgr +; Check the next value down, which must use an ALGFI. +define i64 @f9(i64 %dummy, ptr %src) { +; CHECK-LABEL: f9: +; CHECK: algfi ; CHECK: br %r14 %res = atomicrmw sub ptr %src, i64 -2147483648 seq_cst ret i64 %res diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll index cf366b9..d991997 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll @@ -96,8 +96,8 @@ define i32 @f8(i32 %dummy, ptr %src, i32 %b) { ; Check that indexed addresses are not allowed. define i32 @f9(i32 %dummy, i64 %base, i64 %index, i32 %b) { ; CHECK-LABEL: f9: +; CHECK: l %r2, 0(%r4,%r3) ; CHECK: agr %r3, %r4 -; CHECK: l %r2, 0(%r3) ; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3) ; CHECK: br %r14 %add = add i64 %base, %index diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll index 9a493cb..4797c5c 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll @@ -63,8 +63,8 @@ define i64 @f5(i64 %dummy, ptr %src, i64 %b) { ; Check that indexed addresses are not allowed. define i64 @f6(i64 %dummy, i64 %base, i64 %index, i64 %b) { ; CHECK-LABEL: f6: +; CHECK: lg %r2, 0(%r4,%r3) ; CHECK: agr %r3, %r4 -; CHECK: lg %r2, 0(%r3) ; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3) ; CHECK: br %r14 %add = add i64 %base, %index diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-xor-04.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-xor-04.ll index 6cf1b80..ee5fc1c 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-xor-04.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-xor-04.ll @@ -48,11 +48,11 @@ define i64 @f4(i64 %dummy, ptr %src) { ret i64 %res } -; Check the next value up, which must use a register. (We could use -; combinations of XIH* and XIL* instead, but that isn't implemented.) +; Check the next value up. define i64 @f5(i64 %dummy, ptr %src) { ; CHECK-LABEL: f5: -; CHECK: xgr +; CHECK: xihf %r0, 1 +; CHECK: xilf %r0, 1 ; CHECK: br %r14 %res = atomicrmw xor ptr %src, i64 4294967297 seq_cst ret i64 %res @@ -70,7 +70,8 @@ define i64 @f6(i64 %dummy, ptr %src) { ; Check the next value up, which must use a register. define i64 @f7(i64 %dummy, ptr %src) { ; CHECK-LABEL: f7: -; CHECK: xgr +; CHECK: xihf %r0, 4294967295 +; CHECK: xilf %r0, 1 ; CHECK: br %r14 %res = atomicrmw xor ptr %src, i64 -4294967295 seq_cst ret i64 %res |