aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
diff options
context:
space:
mode:
authorSimon Tatham <simon.tatham@arm.com>2022-08-01 13:40:32 +0100
committerSimon Tatham <simon.tatham@arm.com>2022-08-08 10:49:51 +0100
commit72017e9b16b737c5bd7c1dd33abff36f368fa724 (patch)
tree493cbdf0631efa94878df0f5310c8baaa7c8f9fa /llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
parent1eee6de873974f55538df976bf7802f019eac70a (diff)
downloadllvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.zip
llvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.tar.gz
llvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.tar.bz2
[llvm-objdump,ARM] Fix big-endian AArch32 disassembly.
The ABI for big-endian AArch32, as specified by AAELF32, is above- averagely complicated. Relocatable object files are expected to store instruction encodings in byte order matching the ELF file's endianness (so, big-endian for a BE ELF file). But executable images can //either// do that //or// store instructions little-endian regardless of data and ELF endianness (to support BE32 and BE8 platforms respectively). They signal the latter by setting the EF_ARM_BE8 flag in the ELF header. (In the case of the Thumb instruction set, this all means that each 16-bit halfword of a Thumb instruction is stored in one or other endianness. The two halfwords of a 32-bit Thumb instruction must appear in the same order no matter what, because the first halfword is the one that must avoid overlapping the encoding of any 16-bit Thumb instruction.) llvm-objdump was unconditionally expecting Arm instructions to be stored little-endian. So it would correctly disassemble a BE8 image, but if you gave it a BE32 image or a BE object file, it would retrieve every instruction in byte-swapped form and disassemble it to nonsense. (Even an object file output by LLVM itself, because ARMMCCodeEmitter outputs instructions big-endian in big-endian mode, which is correct for writing an object file.) This patch allows llvm-objdump to correctly disassemble all three of those classes of Arm ELF file. It does it by introducing a new SubtargetFeature for big-endian instructions, setting it from the ELF image type and flags during llvm-objdump setup, and teaching both ARMDisassembler and llvm-objdump itself to pay attention to it when retrieving instruction data from a section being disassembled. Differential Revision: https://reviews.llvm.org/D130902
Diffstat (limited to 'llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp')
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp20
1 files changed, 14 insertions, 6 deletions
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index f814959..f15cbb7 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -131,6 +131,9 @@ class ARMDisassembler : public MCDisassembler {
public:
ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
MCDisassembler(STI, Ctx) {
+ InstructionEndianness = STI.getFeatureBits()[ARM::ModeBigEndianInstructions]
+ ? llvm::support::big
+ : llvm::support::little;
}
~ARMDisassembler() override = default;
@@ -156,6 +159,8 @@ private:
DecodeStatus AddThumbPredicate(MCInst&) const;
void UpdateThumbVFPPredicate(DecodeStatus &, MCInst&) const;
+
+ llvm::support::endianness InstructionEndianness;
};
} // end anonymous namespace
@@ -765,7 +770,8 @@ uint64_t ARMDisassembler::suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
if (Bytes.size() < 2)
return 2;
- uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+ uint16_t Insn16 = llvm::support::endian::read<uint16_t>(
+ Bytes.data(), InstructionEndianness);
return Insn16 < 0xE800 ? 2 : 4;
}
@@ -794,9 +800,9 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
- // Encoded as a small-endian 32-bit word in the stream.
- uint32_t Insn =
- (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
+ // Encoded as a 32-bit word in the stream.
+ uint32_t Insn = llvm::support::endian::read<uint32_t>(Bytes.data(),
+ InstructionEndianness);
// Calling the auto-generated decoder function.
DecodeStatus Result =
@@ -1084,7 +1090,8 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
- uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+ uint16_t Insn16 = llvm::support::endian::read<uint16_t>(
+ Bytes.data(), InstructionEndianness);
DecodeStatus Result =
decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -1138,7 +1145,8 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
}
uint32_t Insn32 =
- (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
+ (uint32_t(Insn16) << 16) | llvm::support::endian::read<uint16_t>(
+ Bytes.data() + 2, InstructionEndianness);
Result =
decodeInstruction(DecoderTableMVE32, MI, Insn32, Address, this, STI);