diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-11-23 21:17:31 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-11-23 21:17:31 +0000 |
commit | c727a6e009002ab8a0a872f2d9a8b4fccdd03f2e (patch) | |
tree | 238aafe0cf6d523477bffa1e15ac58232d261b8d | |
parent | 3c3d6350cd0d339d5c6b61a444c86caeaf045649 (diff) | |
download | llvm-c727a6e009002ab8a0a872f2d9a8b4fccdd03f2e.zip llvm-c727a6e009002ab8a0a872f2d9a8b4fccdd03f2e.tar.gz llvm-c727a6e009002ab8a0a872f2d9a8b4fccdd03f2e.tar.bz2 |
Merging r279933:
------------------------------------------------------------------------
r279933 | hfinkel | 2016-08-28 09:17:58 -0700 (Sun, 28 Aug 2016) | 4 lines
[PowerPC] Implement lowering for atomicrmw min/max/umin/umax
Implement lowering for atomicrmw min/max/umin/umax. Fixes PR28818.
------------------------------------------------------------------------
llvm-svn: 287810
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 101 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 36 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/atomic-minmax.ll | 435 |
5 files changed, 587 insertions, 5 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 34c9009..b33efb7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8385,7 +8385,9 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, - unsigned BinOpcode) const { + unsigned BinOpcode, + unsigned CmpOpcode, + unsigned CmpPred) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -8425,8 +8427,12 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = + CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); + if (CmpOpcode) + F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); @@ -8448,11 +8454,31 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, // st[wd]cx. r0, ptr // bne- loopMBB // fallthrough --> exitMBB + + // For max/min... + // loopMBB: + // l[wd]arx dest, ptr + // cmpl?[wd] incr, dest + // bgt exitMBB + // loop2MBB: + // st[wd]cx. dest, ptr + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); + if (CmpOpcode) { + BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) + .addReg(incr).addReg(dest); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + BB = loop2MBB; + } BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) @@ -8470,10 +8496,13 @@ MachineBasicBlock * PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation - unsigned BinOpcode) const { + unsigned BinOpcode, + unsigned CmpOpcode, + unsigned CmpPred) const { // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) - return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, + CmpOpcode, CmpPred); // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -8495,8 +8524,12 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = + CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); + if (CmpOpcode) + F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); @@ -8581,6 +8614,32 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) .addReg(TmpReg).addReg(MaskReg); + if (CmpOpcode) { + // For unsigned comparisons, we can directly compare the shifted values. + // For signed comparisons we shift and sign extend. + unsigned SReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) + .addReg(TmpDestReg).addReg(MaskReg); + unsigned ValueReg = SReg; + unsigned CmpReg = Incr2Reg; + if (CmpOpcode == PPC::CMPW) { + ValueReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) + .addReg(SReg).addReg(ShiftReg); + unsigned ValueSReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) + .addReg(ValueReg); + ValueReg = ValueSReg; + CmpReg = incr; + } + BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) + .addReg(CmpReg).addReg(ValueReg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + BB = loop2MBB; + } BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) @@ -9087,6 +9146,42 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 433aca1..e605045 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -585,11 +585,15 @@ namespace llvm { MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, - unsigned BinOpcode) const; + unsigned BinOpcode, + unsigned CmpOpcode = 0, + unsigned CmpPred = 0) const; MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, - unsigned Opcode) const; + unsigned Opcode, + unsigned CmpOpcode = 0, + unsigned CmpPred = 0) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index e7eb8a1..5e514c8 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -224,6 +224,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I64 : Pseudo< (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64", [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_MIN_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64", + [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_MAX_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64", + [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_UMIN_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64", + [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_UMAX_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64", + [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64", diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 4a42a94..a40d4e1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1509,6 +1509,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I8 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8", [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8", + [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8", + [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8", + [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8", + [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16", [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; @@ -1527,6 +1539,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I16 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16", [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16", + [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16", + [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16", + [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16", + [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32", [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; @@ -1545,6 +1569,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I32 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32", [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32", + [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32", + [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32", + [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32", + [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", diff --git a/llvm/test/CodeGen/PowerPC/atomic-minmax.ll b/llvm/test/CodeGen/PowerPC/atomic-minmax.ll new file mode 100644 index 0000000..5b9a153 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/atomic-minmax.ll @@ -0,0 +1,435 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @a32min(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { +entry: + %0 = atomicrmw min i32* %minimum, i32 %val monotonic + ret void + +; CHECK-LABEL: @a32min +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stwcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a32max(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { +entry: + %0 = atomicrmw max i32* %minimum, i32 %val monotonic + ret void + +; CHECK-LABEL: @a32max +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stwcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a32umin(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { +entry: + %0 = atomicrmw umin i32* %minimum, i32 %val monotonic + ret void + +; CHECK-LABEL: @a32umin +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stwcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a32umax(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { +entry: + %0 = atomicrmw umax i32* %minimum, i32 %val monotonic + ret void + +; CHECK-LABEL: @a32umax +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stwcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a16min(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { +entry: + %0 = atomicrmw min i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @a16min +; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: sthcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a16max(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { +entry: + %0 = atomicrmw max i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @a16max +; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: sthcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a16umin(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { +entry: + %0 = atomicrmw umin i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @a16umin +; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: sthcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a16umax(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { +entry: + %0 = atomicrmw umax i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @a16umax +; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: sthcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a8min(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { +entry: + %0 = atomicrmw min i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @a8min +; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stbcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a8max(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { +entry: + %0 = atomicrmw max i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @a8max +; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stbcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a8umin(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { +entry: + %0 = atomicrmw umin i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @a8umin +; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stbcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a8umax(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { +entry: + %0 = atomicrmw umax i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @a8umax +; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stbcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a64min(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { +entry: + %0 = atomicrmw min i64* %minimum, i64 %val monotonic + ret void + +; CHECK-LABEL: @a64min +; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpd 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stdcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a64max(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { +entry: + %0 = atomicrmw max i64* %minimum, i64 %val monotonic + ret void + +; CHECK-LABEL: @a64max +; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpd 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stdcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a64umin(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { +entry: + %0 = atomicrmw umin i64* %minimum, i64 %val monotonic + ret void + +; CHECK-LABEL: @a64umin +; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpld 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stdcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a64umax(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { +entry: + %0 = atomicrmw umax i64* %minimum, i64 %val monotonic + ret void + +; CHECK-LABEL: @a64umax +; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpld 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stdcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae16min(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { +entry: + %0 = atomicrmw min i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @ae16min +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 +; CHECK-DAG: li [[M1:[0-9]+]], 0 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 +; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] +; CHECK: extsh [[SESMOLDV:[0-9]+]], [[SMOLDV]] +; CHECK: cmpw 0, 4, [[SESMOLDV]] +; CHECK: bgelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae16max(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { +entry: + %0 = atomicrmw max i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @ae16max +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 +; CHECK-DAG: li [[M1:[0-9]+]], 0 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 +; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] +; CHECK: extsh [[SESMOLDV:[0-9]+]], [[SMOLDV]] +; CHECK: cmpw 0, 4, [[SESMOLDV]] +; CHECK: blelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae16umin(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { +entry: + %0 = atomicrmw umin i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @ae16umin +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 +; CHECK-DAG: li [[M1:[0-9]+]], 0 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 +; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: cmplw 0, 4, [[MOLDV]] +; CHECK: bgelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae16umax(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { +entry: + %0 = atomicrmw umax i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @ae16umax +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 +; CHECK-DAG: li [[M1:[0-9]+]], 0 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 +; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: cmplw 0, 4, [[MOLDV]] +; CHECK: blelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae8min(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { +entry: + %0 = atomicrmw min i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @ae8min +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 +; CHECK-DAG: li [[M1:[0-9]+]], 255 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] +; CHECK: extsb [[SESMOLDV:[0-9]+]], [[SMOLDV]] +; CHECK: cmpw 0, 4, [[SESMOLDV]] +; CHECK: bgelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae8max(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { +entry: + %0 = atomicrmw max i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @ae8max +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 +; CHECK-DAG: li [[M1:[0-9]+]], 255 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] +; CHECK: extsb [[SESMOLDV:[0-9]+]], [[SMOLDV]] +; CHECK: cmpw 0, 4, [[SESMOLDV]] +; CHECK: blelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae8umin(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { +entry: + %0 = atomicrmw umin i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @ae8umin +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 +; CHECK-DAG: li [[M1:[0-9]+]], 255 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: cmplw 0, 4, [[MOLDV]] +; CHECK: bgelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae8umax(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { +entry: + %0 = atomicrmw umax i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @ae8umax +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 +; CHECK-DAG: li [[M1:[0-9]+]], 255 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: cmplw 0, 4, [[MOLDV]] +; CHECK: blelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +attributes #0 = { nounwind "target-cpu"="ppc64" } +attributes #1 = { nounwind "target-cpu"="pwr8" } + |