diff options
Diffstat (limited to 'llvm/lib/Target/ARM')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 436 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMAsmPrinter.h | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 69 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMTargetMachine.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/Thumb2InstrInfo.h | 3 |
10 files changed, 591 insertions, 3 deletions
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 1f773e2..36b9908 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -820,7 +820,7 @@ void ARMAsmPrinter::emitAttributes() { auto *BTIValue = mdconst::extract_or_null<ConstantInt>( SourceModule->getModuleFlag("branch-target-enforcement")); - if (BTIValue && BTIValue->isOne()) { + if (BTIValue && !BTIValue->isZero()) { // If "+pacbti" is used as an architecture extension, // Tag_BTI_extension is emitted in // ARMTargetStreamer::emitTargetAttributes(). @@ -1471,6 +1471,435 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { // instructions) auto-generated. #include "ARMGenMCPseudoLowering.inc" +// Helper function to check if a register is live (used as an implicit operand) +// in the given call instruction. +static bool isRegisterLiveInCall(const MachineInstr &Call, MCRegister Reg) { + for (const MachineOperand &MO : Call.implicit_operands()) { + if (MO.isReg() && MO.getReg() == Reg && MO.isUse()) { + return true; + } + } + return false; +} + +void ARMAsmPrinter::EmitKCFI_CHECK_ARM32(Register AddrReg, int64_t Type, + const MachineInstr &Call, + int64_t PrefixNops) { + // Choose scratch register: r12 primary, r3 if target is r12. + unsigned ScratchReg = ARM::R12; + if (AddrReg == ARM::R12) { + ScratchReg = ARM::R3; + } + + // Calculate ESR for ARM mode (16-bit): 0x8000 | (scratch_reg << 5) | addr_reg + // Note: scratch_reg is always 0x1F since the EOR sequence clobbers it. + const ARMBaseRegisterInfo *TRI = static_cast<const ARMBaseRegisterInfo *>( + MF->getSubtarget().getRegisterInfo()); + unsigned AddrIndex = TRI->getEncodingValue(AddrReg); + unsigned ESR = 0x8000 | (31 << 5) | (AddrIndex & 31); + + // Check if r3 is live and needs to be spilled. + bool NeedSpillR3 = + (ScratchReg == ARM::R3) && isRegisterLiveInCall(Call, ARM::R3); + + // If we need to spill r3, push it first. + if (NeedSpillR3) { + // push {r3} + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::STMDB_UPD) + .addReg(ARM::SP) + .addReg(ARM::SP) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(ARM::R3)); + } + + // Clear bit 0 of target address to handle Thumb function pointers. + // In 32-bit ARM, function pointers may have the low bit set to indicate + // Thumb state when ARM/Thumb interworking is enabled (ARMv4T and later). + // We need to clear it to avoid an alignment fault when loading. + // bic scratch, target, #1 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BICri) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(1) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(0)); + + // ldr scratch, [scratch, #-(PrefixNops * 4 + 4)] + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(-(PrefixNops * 4 + 4)) + .addImm(ARMCC::AL) + .addReg(0)); + + // Each EOR instruction XORs one byte of the type, shifted to its position. + for (int i = 0; i < 4; i++) { + uint8_t byte = (Type >> (i * 8)) & 0xFF; + uint32_t imm = byte << (i * 8); + bool isLast = (i == 3); + + // Encode as ARM modified immediate. + int SOImmVal = ARM_AM::getSOImmVal(imm); + assert(SOImmVal != -1 && + "Cannot encode immediate as ARM modified immediate"); + + // eor[s] scratch, scratch, #imm (last one sets flags with CPSR) + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::EORri) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(SOImmVal) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(isLast ? ARM::CPSR : ARM::NoRegister)); + } + + // If we spilled r3, restore it immediately after the comparison. + // This must happen before the branch so r3 is valid on both paths. + if (NeedSpillR3) { + // pop {r3} + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDMIA_UPD) + .addReg(ARM::SP) + .addReg(ARM::SP) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(ARM::R3)); + } + + // beq .Lpass (branch if types match, i.e., scratch is zero) + MCSymbol *Pass = OutContext.createTempSymbol(); + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::Bcc) + .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR)); + + // udf #ESR (trap with encoded diagnostic) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::UDF).addImm(ESR)); + + OutStreamer->emitLabel(Pass); +} + +void ARMAsmPrinter::EmitKCFI_CHECK_Thumb2(Register AddrReg, int64_t Type, + const MachineInstr &Call, + int64_t PrefixNops) { + // Choose scratch register: r12 primary, r3 if target is r12. + unsigned ScratchReg = ARM::R12; + if (AddrReg == ARM::R12) { + ScratchReg = ARM::R3; + } + + // Calculate ESR for Thumb mode (8-bit): 0x80 | addr_reg + // Bit 7: KCFI trap indicator + // Bits 6-5: Reserved + // Bits 4-0: Address register encoding + const ARMBaseRegisterInfo *TRI = static_cast<const ARMBaseRegisterInfo *>( + MF->getSubtarget().getRegisterInfo()); + unsigned AddrIndex = TRI->getEncodingValue(AddrReg); + unsigned ESR = 0x80 | (AddrIndex & 0x1F); + + // Check if r3 is live and needs to be spilled. + bool NeedSpillR3 = + (ScratchReg == ARM::R3) && isRegisterLiveInCall(Call, ARM::R3); + + // If we need to spill r3, push it first. + if (NeedSpillR3) { + // push {r3} + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPUSH).addImm(ARMCC::AL).addReg(0).addReg(ARM::R3)); + } + + // Clear bit 0 of target address to handle Thumb function pointers. + // In 32-bit ARM, function pointers may have the low bit set to indicate + // Thumb state when ARM/Thumb interworking is enabled (ARMv4T and later). + // We need to clear it to avoid an alignment fault when loading. + // bic scratch, target, #1 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2BICri) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(1) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(0)); + + // ldr scratch, [scratch, #-(PrefixNops * 4 + 4)] + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2LDRi8) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(-(PrefixNops * 4 + 4)) + .addImm(ARMCC::AL) + .addReg(0)); + + // Each EOR instruction XORs one byte of the type, shifted to its position. + for (int i = 0; i < 4; i++) { + uint8_t byte = (Type >> (i * 8)) & 0xFF; + uint32_t imm = byte << (i * 8); + bool isLast = (i == 3); + + // Verify the immediate can be encoded as Thumb2 modified immediate. + assert(ARM_AM::getT2SOImmVal(imm) != -1 && + "Cannot encode immediate as Thumb2 modified immediate"); + + // eor[s] scratch, scratch, #imm (last one sets flags with CPSR) + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::t2EORri) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(imm) + .addImm(ARMCC::AL) + .addReg(0) + .addReg(isLast ? ARM::CPSR : ARM::NoRegister)); + } + + // If we spilled r3, restore it immediately after the comparison. + // This must happen before the branch so r3 is valid on both paths. + if (NeedSpillR3) { + // pop {r3} + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPOP).addImm(ARMCC::AL).addReg(0).addReg(ARM::R3)); + } + + // beq .Lpass (branch if types match, i.e., scratch is zero) + MCSymbol *Pass = OutContext.createTempSymbol(); + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::t2Bcc) + .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR)); + + // udf #ESR (trap with encoded diagnostic) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tUDF).addImm(ESR)); + + OutStreamer->emitLabel(Pass); +} + +void ARMAsmPrinter::EmitKCFI_CHECK_Thumb1(Register AddrReg, int64_t Type, + const MachineInstr &Call, + int64_t PrefixNops) { + // For Thumb1, use R2 unconditionally as scratch register (a low register + // required for tLDRi). R3 is used for building the type hash. + unsigned ScratchReg = ARM::R2; + unsigned TempReg = ARM::R3; + + // Check if r3 is live and needs to be spilled. + bool NeedSpillR3 = isRegisterLiveInCall(Call, ARM::R3); + + // Spill r3 if needed + if (NeedSpillR3) { + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPUSH).addImm(ARMCC::AL).addReg(0).addReg(ARM::R3)); + } + + // Check if r2 is live and needs to be spilled. + bool NeedSpillR2 = isRegisterLiveInCall(Call, ARM::R2); + + // Push R2 if it's live + if (NeedSpillR2) { + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPUSH).addImm(ARMCC::AL).addReg(0).addReg(ARM::R2)); + } + + // Clear bit 0 from target address + // TempReg (R3) is used first as helper for BIC, then later for building type + // hash. + + // movs temp, #1 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addImm(1) + .addImm(ARMCC::AL) + .addReg(0)); + + // mov scratch, target + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) + .addReg(ScratchReg) + .addReg(AddrReg) + .addImm(ARMCC::AL)); + + // bics scratch, temp (scratch = scratch & ~temp) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBIC) + .addReg(ScratchReg) + .addReg(ARM::CPSR) + .addReg(ScratchReg) + .addReg(TempReg) + .addImm(ARMCC::AL) + .addReg(0)); + + // Load type hash. Thumb1 doesn't support negative offsets, so subtract. + int offset = PrefixNops * 4 + 4; + + // subs scratch, #offset + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tSUBi8) + .addReg(ScratchReg) + .addReg(ARM::CPSR) + .addReg(ScratchReg) + .addImm(offset) + .addImm(ARMCC::AL) + .addReg(0)); + + // ldr scratch, [scratch, #0] + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(0) + .addImm(ARMCC::AL) + .addReg(0)); + + // Load expected type inline (instead of EOR sequence) + // + // This creates the 32-bit value byte-by-byte in the temp register: + // movs temp, #byte3 (high byte) + // lsls temp, temp, #8 + // adds temp, #byte2 + // lsls temp, temp, #8 + // adds temp, #byte1 + // lsls temp, temp, #8 + // adds temp, #byte0 (low byte) + + uint8_t byte0 = (Type >> 0) & 0xFF; + uint8_t byte1 = (Type >> 8) & 0xFF; + uint8_t byte2 = (Type >> 16) & 0xFF; + uint8_t byte3 = (Type >> 24) & 0xFF; + + // movs temp, #byte3 (start with high byte) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addImm(byte3) + .addImm(ARMCC::AL) + .addReg(0)); + + // lsls temp, temp, #8 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(8) + .addImm(ARMCC::AL) + .addReg(0)); + + // adds temp, #byte2 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(byte2) + .addImm(ARMCC::AL) + .addReg(0)); + + // lsls temp, temp, #8 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(8) + .addImm(ARMCC::AL) + .addReg(0)); + + // adds temp, #byte1 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(byte1) + .addImm(ARMCC::AL) + .addReg(0)); + + // lsls temp, temp, #8 + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(8) + .addImm(ARMCC::AL) + .addReg(0)); + + // adds temp, #byte0 (low byte) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDi8) + .addReg(TempReg) + .addReg(ARM::CPSR) + .addReg(TempReg) + .addImm(byte0) + .addImm(ARMCC::AL) + .addReg(0)); + + // cmp scratch, temp + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tCMPr) + .addReg(ScratchReg) + .addReg(TempReg) + .addImm(ARMCC::AL) + .addReg(0)); + + // Restore registers if spilled (pop in reverse order of push: R2, then R3) + if (NeedSpillR2) { + // pop {r2} + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPOP).addImm(ARMCC::AL).addReg(0).addReg(ARM::R2)); + } + + // Restore r3 if spilled + if (NeedSpillR3) { + // pop {r3} + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::tPOP).addImm(ARMCC::AL).addReg(0).addReg(ARM::R3)); + } + + // beq .Lpass (branch if types match, i.e., scratch == temp) + MCSymbol *Pass = OutContext.createTempSymbol(); + EmitToStreamer(*OutStreamer, + MCInstBuilder(ARM::tBcc) + .addExpr(MCSymbolRefExpr::create(Pass, OutContext)) + .addImm(ARMCC::EQ) + .addReg(ARM::CPSR)); + + // bkpt #0 (trap with encoded diagnostic) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBKPT).addImm(0)); + + OutStreamer->emitLabel(Pass); +} + +void ARMAsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { + Register AddrReg = MI.getOperand(0).getReg(); + const int64_t Type = MI.getOperand(1).getImm(); + + // Get the call instruction that follows this KCFI_CHECK. + assert(std::next(MI.getIterator())->isCall() && + "KCFI_CHECK not followed by a call instruction"); + const MachineInstr &Call = *std::next(MI.getIterator()); + + // Adjust the offset for patchable-function-prefix. + int64_t PrefixNops = 0; + MI.getMF() + ->getFunction() + .getFnAttribute("patchable-function-prefix") + .getValueAsString() + .getAsInteger(10, PrefixNops); + + // Emit the appropriate instruction sequence based on the opcode variant. + switch (MI.getOpcode()) { + case ARM::KCFI_CHECK_ARM: + EmitKCFI_CHECK_ARM32(AddrReg, Type, Call, PrefixNops); + break; + case ARM::KCFI_CHECK_Thumb2: + EmitKCFI_CHECK_Thumb2(AddrReg, Type, Call, PrefixNops); + break; + case ARM::KCFI_CHECK_Thumb1: + EmitKCFI_CHECK_Thumb1(AddrReg, Type, Call, PrefixNops); + break; + default: + llvm_unreachable("Unexpected KCFI_CHECK opcode"); + } +} + void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { ARM_MC::verifyInstructionPredicates(MI->getOpcode(), getSubtargetInfo().getFeatureBits()); @@ -1504,6 +1933,11 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { switch (Opc) { case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); case ARM::DBG_VALUE: llvm_unreachable("Should be handled by generic printing"); + case ARM::KCFI_CHECK_ARM: + case ARM::KCFI_CHECK_Thumb2: + case ARM::KCFI_CHECK_Thumb1: + LowerKCFI_CHECK(*MI); + return; case ARM::LEApcrel: case ARM::tLEApcrel: case ARM::t2LEApcrel: { diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.h b/llvm/lib/Target/ARM/ARMAsmPrinter.h index 2b067c7..9e92b5a 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -123,9 +123,20 @@ public: void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); + // KCFI check lowering + void LowerKCFI_CHECK(const MachineInstr &MI); + private: void EmitSled(const MachineInstr &MI, SledKind Kind); + // KCFI check emission helpers + void EmitKCFI_CHECK_ARM32(Register AddrReg, int64_t Type, + const MachineInstr &Call, int64_t PrefixNops); + void EmitKCFI_CHECK_Thumb2(Register AddrReg, int64_t Type, + const MachineInstr &Call, int64_t PrefixNops); + void EmitKCFI_CHECK_Thumb1(Register AddrReg, int64_t Type, + const MachineInstr &Call, int64_t PrefixNops); + // Helpers for emitStartOfAsmFile() and emitEndOfAsmFile() void emitAttributes(); diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 0d7b6d1..fffb6373 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -2301,6 +2301,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) NewMI->addOperand(MBBI->getOperand(i)); + NewMI->setCFIType(*MBB.getParent(), MI.getCFIType()); + // Update call info and delete the pseudo instruction TCRETURN. if (MI.isCandidateForAdditionalCallInfo()) MI.getMF()->moveAdditionalCallInfo(&MI, &*NewMI); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index b1a668e..8122db2 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2849,6 +2849,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall) { MF.getFrameInfo().setHasTailCall(); SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops); + if (CLI.CFIType) + Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); return Ret; @@ -2856,6 +2858,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops); + if (CLI.CFIType) + Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); InGlue = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); @@ -12008,6 +12012,71 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody, .add(predOps(ARMCC::AL)); } +bool ARMTargetLowering::supportKCFIBundles() const { + // KCFI is supported in all ARM/Thumb modes + return true; +} + +MachineInstr * +ARMTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator &MBBI, + const TargetInstrInfo *TII) const { + assert(MBBI->isCall() && MBBI->getCFIType() && + "Invalid call instruction for a KCFI check"); + + MachineOperand *TargetOp = nullptr; + switch (MBBI->getOpcode()) { + // ARM mode opcodes + case ARM::BLX: + case ARM::BLX_pred: + case ARM::BLX_noip: + case ARM::BLX_pred_noip: + case ARM::BX_CALL: + TargetOp = &MBBI->getOperand(0); + break; + case ARM::TCRETURNri: + case ARM::TCRETURNrinotr12: + case ARM::TAILJMPr: + case ARM::TAILJMPr4: + TargetOp = &MBBI->getOperand(0); + break; + // Thumb mode opcodes (Thumb1 and Thumb2) + // Note: Most Thumb call instructions have predicate operands before the + // target register Format: tBLXr pred, predreg, target_register, ... + case ARM::tBLXr: // Thumb1/Thumb2: BLX register (requires V5T) + case ARM::tBLXr_noip: // Thumb1/Thumb2: BLX register, no IP clobber + case ARM::tBX_CALL: // Thumb1 only: BX call (push LR, BX) + TargetOp = &MBBI->getOperand(2); + break; + // Tail call instructions don't have predicates, target is operand 0 + case ARM::tTAILJMPr: // Thumb1/Thumb2: Tail call via register + TargetOp = &MBBI->getOperand(0); + break; + default: + llvm_unreachable("Unexpected CFI call opcode"); + } + + assert(TargetOp && TargetOp->isReg() && "Invalid target operand"); + TargetOp->setIsRenamable(false); + + // Select the appropriate KCFI_CHECK variant based on the instruction set + unsigned KCFICheckOpcode; + if (Subtarget->isThumb()) { + if (Subtarget->isThumb2()) { + KCFICheckOpcode = ARM::KCFI_CHECK_Thumb2; + } else { + KCFICheckOpcode = ARM::KCFI_CHECK_Thumb1; + } + } else { + KCFICheckOpcode = ARM::KCFI_CHECK_ARM; + } + + return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(KCFICheckOpcode)) + .addReg(TargetOp->getReg()) + .addImm(MBBI->getCFIType()) + .getInstr(); +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 70aa001..8c5e0cf 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -447,6 +447,12 @@ class VectorType; void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override; + bool supportKCFIBundles() const override; + + MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator &MBBI, + const TargetInstrInfo *TII) const override; + SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const; SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 282ff53..53be167 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -6536,6 +6536,36 @@ def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPRPair:$addr_temp_out), def : Pat<(atomic_fence (timm), 0), (MEMBARRIER)>; //===----------------------------------------------------------------------===// +// KCFI check pseudo-instruction. +//===----------------------------------------------------------------------===// +// KCFI_CHECK pseudo-instruction for Kernel Control-Flow Integrity. +// Expands to a sequence that verifies the function pointer's type hash. +// Different sizes for different architectures due to different expansions. + +def KCFI_CHECK_ARM + : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, + Sched<[]>, + Requires<[IsARM]> { + let Size = 28; // 7 instructions (bic, ldr, 4x eor, beq, udf) +} + +def KCFI_CHECK_Thumb2 + : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, + Sched<[]>, + Requires<[IsThumb2]> { + let Size = + 32; // worst-case 9 instructions (push, bic, ldr, 4x eor, pop, beq.w, udf) +} + +def KCFI_CHECK_Thumb1 + : PseudoInst<(outs), (ins GPR:$ptr, i32imm:$type), NoItinerary, []>, + Sched<[]>, + Requires<[IsThumb1Only]> { + let Size = 50; // worst-case 25 instructions (pushes, bic helper, type + // building, cmp, pops) +} + +//===----------------------------------------------------------------------===// // Instructions used for emitting unwind opcodes on Windows. //===----------------------------------------------------------------------===// let isPseudo = 1 in { diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 406f4c1..597d311 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1036,6 +1036,7 @@ bool LowOverheadLoop::ValidateLiveOuts() { while (!Worklist.empty()) { MachineInstr *MI = Worklist.pop_back_val(); if (MI->getOpcode() == ARM::MQPRCopy) { + LLVM_DEBUG(dbgs() << " Must generate copy as VMOV: " << *MI); VMOVCopies.insert(MI); MachineInstr *CopySrc = RDI.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg()); @@ -1045,6 +1046,20 @@ bool LowOverheadLoop::ValidateLiveOuts() { LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI); VMOVCopies.clear(); return false; + } else if (isVectorPredicated(MI)) { + // If this is a predicated instruction with merging semantics, + // check where it gets its false lanes from, if any. + int InactiveIdx = findVPTInactiveOperandIdx(*MI); + if (InactiveIdx != -1) { + SmallPtrSet<MachineInstr *, 2> Defs; + MachineInstr *FalseSrc = RDI.getUniqueReachingMIDef( + MI, MI->getOperand(InactiveIdx).getReg()); + if (FalseSrc) { + LLVM_DEBUG(dbgs() + << " Must check source of false lanes for: " << *MI); + Worklist.push_back(FalseSrc); + } + } } } diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 86740a9..590d4c7 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -111,6 +111,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() { initializeMVELaneInterleavingPass(Registry); initializeARMFixCortexA57AES1742098Pass(Registry); initializeARMDAGToDAGISelLegacyPass(Registry); + initializeKCFIPass(Registry); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -487,6 +488,9 @@ void ARMPassConfig::addPreSched2() { // proper scheduling. addPass(createARMExpandPseudoPass()); + // Emit KCFI checks for indirect calls. + addPass(createKCFIPass()); + if (getOptLevel() != CodeGenOptLevel::None) { // When optimising for size, always run the Thumb2SizeReduction pass before // IfConversion. Otherwise, check whether IT blocks are restricted @@ -517,9 +521,12 @@ void ARMPassConfig::addPreSched2() { void ARMPassConfig::addPreEmitPass() { addPass(createThumb2SizeReductionPass()); - // Constant island pass work on unbundled instructions. + // Unpack bundles for: + // - Thumb2: Constant island pass requires unbundled instructions + // - KCFI: KCFI_CHECK pseudo instructions need to be unbundled for AsmPrinter addPass(createUnpackMachineBundles([](const MachineFunction &MF) { - return MF.getSubtarget<ARMSubtarget>().isThumb2(); + return MF.getSubtarget<ARMSubtarget>().isThumb2() || + MF.getFunction().getParent()->getModuleFlag("kcfi"); })); // Don't optimize barriers or block placement at -O0. @@ -530,6 +537,7 @@ void ARMPassConfig::addPreEmitPass() { } void ARMPassConfig::addPreEmitPass2() { + // Inserts fixup instructions before unsafe AES operations. Instructions may // be inserted at the start of blocks and at within blocks so this pass has to // come before those below. diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 431ce38..f5653d4 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -805,6 +805,16 @@ int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) { return -1; } +int llvm::findVPTInactiveOperandIdx(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) + if (MCID.operands()[i].OperandType == ARM::OPERAND_VPRED_R) + return i + ARM::SUBOP_vpred_r_inactive; + + return -1; +} + ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg) { int PIdx = findFirstVPTPredOperandIdx(MI); diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 3ec3a621..1b0bf2d 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -90,6 +90,9 @@ inline ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI) { Register PredReg; return getVPTInstrPredicate(MI, PredReg); } +// Identify the input operand in an MVE predicated instruction which +// contributes the values of any inactive vector lanes. +int findVPTInactiveOperandIdx(const MachineInstr &MI); // Recomputes the Block Mask of Instr, a VPT or VPST instruction. // This rebuilds the block mask of the instruction depending on the predicates |
