[llvm-objdump,ARM] Fix big-endian AArch32 disassembly.

The ABI for big-endian AArch32, as specified by AAELF32, is above- averagely complicated. Relocatable object files are expected to store instruction encodings in byte order matching the ELF file's endianness (so, big-endian for a BE ELF file). But executable images can //either// do that //or// store instructions little-endian regardless of data and ELF endianness (to support BE32 and BE8 platforms respectively). They signal the latter by setting the EF_ARM_BE8 flag in the ELF header. (In the case of the Thumb instruction set, this all means that each 16-bit halfword of a Thumb instruction is stored in one or other endianness. The two halfwords of a 32-bit Thumb instruction must appear in the same order no matter what, because the first halfword is the one that must avoid overlapping the encoding of any 16-bit Thumb instruction.) llvm-objdump was unconditionally expecting Arm instructions to be stored little-endian. So it would correctly disassemble a BE8 image, but if you gave it a BE32 image or a BE object file, it would retrieve every instruction in byte-swapped form and disassemble it to nonsense. (Even an object file output by LLVM itself, because ARMMCCodeEmitter outputs instructions big-endian in big-endian mode, which is correct for writing an object file.) This patch allows llvm-objdump to correctly disassemble all three of those classes of Arm ELF file. It does it by introducing a new SubtargetFeature for big-endian instructions, setting it from the ELF image type and flags during llvm-objdump setup, and teaching both ARMDisassembler and llvm-objdump itself to pay attention to it when retrieving instruction data from a section being disassembled. Differential Revision: https://reviews.llvm.org/D130902
author: Simon Tatham <simon.tatham@arm.com> 2022-08-01 13:40:32 +0100
committer: Simon Tatham <simon.tatham@arm.com> 2022-08-08 10:49:51 +0100
commit: 72017e9b16b737c5bd7c1dd33abff36f368fa724 (patch)
tree: 493cbdf0631efa94878df0f5310c8baaa7c8f9fa /llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
parent: 1eee6de873974f55538df976bf7802f019eac70a (diff)
download: llvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.zip
llvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.tar.gz
llvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.tar.bz2
1 files changed, 14 insertions, 6 deletions
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index f814959..f15cbb7 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -131,6 +131,9 @@ class ARMDisassembler : public MCDisassembler {
 public:
   ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
     MCDisassembler(STI, Ctx) {
+    InstructionEndianness = STI.getFeatureBits()[ARM::ModeBigEndianInstructions]
+                                ? llvm::support::big
+                                : llvm::support::little;
   }
 
   ~ARMDisassembler() override = default;
@@ -156,6 +159,8 @@ private:
 
   DecodeStatus AddThumbPredicate(MCInst&) const;
   void UpdateThumbVFPPredicate(DecodeStatus &, MCInst&) const;
+
+  llvm::support::endianness InstructionEndianness;
 };
 
 } // end anonymous namespace
@@ -765,7 +770,8 @@ uint64_t ARMDisassembler::suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
   if (Bytes.size() < 2)
     return 2;
 
-  uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+  uint16_t Insn16 = llvm::support::endian::read<uint16_t>(
+      Bytes.data(), InstructionEndianness);
   return Insn16 < 0xE800 ? 2 : 4;
 }
 
@@ -794,9 +800,9 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
     return MCDisassembler::Fail;
   }
 
-  // Encoded as a small-endian 32-bit word in the stream.
-  uint32_t Insn =
-      (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
+  // Encoded as a 32-bit word in the stream.
+  uint32_t Insn = llvm::support::endian::read<uint32_t>(Bytes.data(),
+                                                        InstructionEndianness);
 
   // Calling the auto-generated decoder function.
   DecodeStatus Result =
@@ -1084,7 +1090,8 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
     return MCDisassembler::Fail;
   }
 
-  uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0];
+  uint16_t Insn16 = llvm::support::endian::read<uint16_t>(
+      Bytes.data(), InstructionEndianness);
   DecodeStatus Result =
       decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -1138,7 +1145,8 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
   }
 
   uint32_t Insn32 =
-      (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
+      (uint32_t(Insn16) << 16) | llvm::support::endian::read<uint16_t>(
+                                     Bytes.data() + 2, InstructionEndianness);
 
   Result =
       decodeInstruction(DecoderTableMVE32, MI, Insn32, Address, this, STI);
author	Simon Tatham <simon.tatham@arm.com>	2022-08-01 13:40:32 +0100
committer	Simon Tatham <simon.tatham@arm.com>	2022-08-08 10:49:51 +0100
commit	72017e9b16b737c5bd7c1dd33abff36f368fa724 (patch)
tree	493cbdf0631efa94878df0f5310c8baaa7c8f9fa /llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
parent	1eee6de873974f55538df976bf7802f019eac70a (diff)
download	llvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.zip llvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.tar.gz llvm-72017e9b16b737c5bd7c1dd33abff36f368fa724.tar.bz2