diff options
author | quic-areg <aregmi@quicinc.com> | 2025-07-18 10:27:59 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-18 10:27:59 -0500 |
commit | ac7ceb3dabfac548caa993e7b77bbadc78af4464 (patch) | |
tree | 05faf3ce6e8d2c34feedc84787d7e2011b10baf1 /llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp | |
parent | 32f0fc597f92f98f1be81abbd07f5164377668ef (diff) | |
download | llvm-ac7ceb3dabfac548caa993e7b77bbadc78af4464.zip llvm-ac7ceb3dabfac548caa993e7b77bbadc78af4464.tar.gz llvm-ac7ceb3dabfac548caa993e7b77bbadc78af4464.tar.bz2 |
[Hexagon][llvm-objdump] Improve disassembly of Hexagon bundles (#145807)
Hexagon instructions are VLIW "bundles" of up to four instruction words
encoded as a single MCInst with operands for each sub-instruction.
Previously, the disassembler's getInstruction() returned the full
bundle, which made it difficult to work with llvm-objdump.
For example, since all instructions are bundles, and bundles do not
branch, branch targets could not be printed.
This patch modifies the Hexagon disassembler to return individual
sub-instructions instead of entire bundles, enabling correct printing of
branch targets and relocations. It also introduces
`MCDisassembler::getInstructionBundle` for cases where the full bundle
is still needed.
By default, llvm-objdump separates instructions with newlines. However,
this does not work well for Hexagon syntax:
{ inst1
inst2
inst3
inst4 <branch> } :endloop0
Instructions may be followed by a closing brace, a closing brace with
`:endloop`, or a newline. Branches must appear within the braces.
To address this, `PrettyPrinter::getInstructionSeparator()` is added and
overridden for Hexagon.
Diffstat (limited to 'llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp')
-rw-r--r-- | llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp | 109 |
1 files changed, 85 insertions, 24 deletions
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 5bd3170..22cff7c 100644 --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -43,12 +43,12 @@ namespace { class HexagonDisassembler : public MCDisassembler { public: std::unique_ptr<MCInstrInfo const> const MCII; - std::unique_ptr<MCInst *> CurrentBundle; + mutable std::unique_ptr<MCInst> CurrentBundle; mutable MCInst const *CurrentExtender; HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII) - : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *), + : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(nullptr), CurrentExtender(nullptr) {} DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB, @@ -57,7 +57,23 @@ public: DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &CStream) const override; + + DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &CStream) const override; + void remapInstruction(MCInst &Instr) const; + +private: + bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address, + uint64_t &BytesToSkip, raw_ostream &CS) const; + + void resetBundle() const { + CurrentBundle.reset(); + CurrentInstruction = nullptr; + } + + mutable MCOperand *CurrentInstruction = nullptr; }; static uint64_t fullValue(HexagonDisassembler const &Disassembler, MCInst &MI, @@ -171,43 +187,88 @@ LLVMInitializeHexagonDisassembler() { createHexagonDisassembler); } -DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - ArrayRef<uint8_t> Bytes, - uint64_t Address, - raw_ostream &CS) const { - CommentStream = &CS; - - DecodeStatus Result = DecodeStatus::Success; +bool HexagonDisassembler::makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address, + uint64_t &BytesToSkip, + raw_ostream &CS) const { bool Complete = false; - Size = 0; + DecodeStatus Result = DecodeStatus::Success; - *CurrentBundle = &MI; - MI.setOpcode(Hexagon::BUNDLE); - MI.addOperand(MCOperand::createImm(0)); + CurrentBundle.reset(new MCInst); + CurrentBundle->setOpcode(Hexagon::BUNDLE); + CurrentBundle->addOperand(MCOperand::createImm(0)); while (Result == Success && !Complete) { if (Bytes.size() < HEXAGON_INSTR_SIZE) - return MCDisassembler::Fail; + return false; MCInst *Inst = getContext().createMCInst(); - Result = getSingleInstruction(*Inst, MI, Bytes, Address, CS, Complete); - MI.addOperand(MCOperand::createInst(Inst)); - Size += HEXAGON_INSTR_SIZE; + Result = getSingleInstruction(*Inst, *CurrentBundle, Bytes, Address, CS, + Complete); + CurrentBundle->addOperand(MCOperand::createInst(Inst)); + BytesToSkip += HEXAGON_INSTR_SIZE; Bytes = Bytes.slice(HEXAGON_INSTR_SIZE); } if (Result == MCDisassembler::Fail) - return Result; - if (Size > HEXAGON_MAX_PACKET_SIZE) - return MCDisassembler::Fail; + return false; + if (BytesToSkip > HEXAGON_MAX_PACKET_SIZE) + return false; const auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI); const auto STI_ = (ArchSTI != nullptr) ? *ArchSTI : STI; - HexagonMCChecker Checker(getContext(), *MCII, STI_, MI, + HexagonMCChecker Checker(getContext(), *MCII, STI_, *CurrentBundle, *getContext().getRegisterInfo(), false); if (!Checker.check()) - return MCDisassembler::Fail; - remapInstruction(MI); + return false; + remapInstruction(*CurrentBundle); + return true; +} + +DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &CS) const { + CommentStream = &CS; + + Size = 0; + uint64_t BytesToSkip = 0; + + if (!CurrentBundle) { + if (!makeBundle(Bytes, Address, BytesToSkip, CS)) { + Size = BytesToSkip; + resetBundle(); + return MCDisassembler::Fail; + } + CurrentInstruction = (CurrentBundle->begin() + 1); + } + + MI = *(CurrentInstruction->getInst()); + Size = HEXAGON_INSTR_SIZE; + if (++CurrentInstruction == CurrentBundle->end()) + resetBundle(); return MCDisassembler::Success; } +DecodeStatus HexagonDisassembler::getInstructionBundle(MCInst &MI, + uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, + raw_ostream &CS) const { + CommentStream = &CS; + Size = 0; + uint64_t BytesToSkip = 0; + assert(!CurrentBundle); + + if (!makeBundle(Bytes, Address, BytesToSkip, CS)) { + Size = BytesToSkip; + resetBundle(); + return MCDisassembler::Fail; + } + + MI = *CurrentBundle; + Size = HEXAGON_INSTR_SIZE * HexagonMCInstrInfo::bundleSize(MI); + resetBundle(); + + return Success; +} + void HexagonDisassembler::remapInstruction(MCInst &Instr) const { for (auto I: HexagonMCInstrInfo::bundleInstructions(Instr)) { auto &MI = const_cast<MCInst &>(*I.getInst()); @@ -482,7 +543,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB, unsigned Offset = 1; bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI); bool PrevVector = false; - auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); + auto Instructions = HexagonMCInstrInfo::bundleInstructions(*CurrentBundle); auto i = Instructions.end() - 1; for (auto n = Instructions.begin() - 1;; --i, ++Offset) { if (i == n) |