diff options
author | Fangrui Song <i@maskray.me> | 2024-12-16 21:05:08 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-16 21:05:08 -0800 |
commit | c6ff809ae9acbc90455dc8b58b2dae84a13366cf (patch) | |
tree | 61f7a2c7b5ce534298ad01e296a75711566d47af | |
parent | e2a94a97bdf26198ab254d61ee4be23a140dab2d (diff) | |
download | llvm-c6ff809ae9acbc90455dc8b58b2dae84a13366cf.zip llvm-c6ff809ae9acbc90455dc8b58b2dae84a13366cf.tar.gz llvm-c6ff809ae9acbc90455dc8b58b2dae84a13366cf.tar.bz2 |
[llvm-mc] Add --hex to disassemble hex bytes
`--disassemble`/`--cdis` parses input bytes as decimal, 0bbin, 0ooct, or
0xhex. While the hexadecimal digit form is most commonly used, requiring
a 0x prefix for each byte (`0x48 0x29 0xc3`) is cumbersome.
Tools like xxd -p and rz-asm use a plain hex dump form without the 0x
prefix or space separator. This patch adds --hex to disassemble such hex
bytes with optional whitespace.
```
% rz-asm -a x86 -b 64 -d 4829c34829c4
sub rbx, rax
sub rsp, rax
% llvm-mc -triple=x86_64 --cdis --hex --output-asm-variant=1 <<< 4829c34829c4
.text
sub rbx, rax
sub rsp, rax
```
Pull Request: https://github.com/llvm/llvm-project/pull/119992
-rw-r--r-- | llvm/docs/CommandGuide/llvm-mc.rst | 4 | ||||
-rw-r--r-- | llvm/test/MC/Disassembler/X86/hex-bytes.txt | 62 | ||||
-rw-r--r-- | llvm/tools/llvm-mc/Disassembler.cpp | 33 | ||||
-rw-r--r-- | llvm/tools/llvm-mc/Disassembler.h | 2 | ||||
-rw-r--r-- | llvm/tools/llvm-mc/llvm-mc.cpp | 8 |
5 files changed, 98 insertions, 11 deletions
diff --git a/llvm/docs/CommandGuide/llvm-mc.rst b/llvm/docs/CommandGuide/llvm-mc.rst index c5d2f93..8d6346f 100644 --- a/llvm/docs/CommandGuide/llvm-mc.rst +++ b/llvm/docs/CommandGuide/llvm-mc.rst @@ -92,6 +92,10 @@ End-user Options Generate DWARF debugging info for assembly source files. +.. option:: --hex + + Take raw hexadecimal bytes as input for disassembly. Whitespace is ignored. + .. option:: --large-code-model Create CFI directives that assume the code might be more than 2 GB. diff --git a/llvm/test/MC/Disassembler/X86/hex-bytes.txt b/llvm/test/MC/Disassembler/X86/hex-bytes.txt new file mode 100644 index 0000000..7d94ca9 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/hex-bytes.txt @@ -0,0 +1,62 @@ +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -triple=x86_64 --disassemble --hex a.s | FileCheck %s +# RUN: llvm-mc -triple=x86_64 --disassemble --hex decode1.s 2>&1 | FileCheck %s --check-prefix=DECODE1 --implicit-check-not=warning: +# RUN: not llvm-mc -triple=x86_64 --disassemble --hex decode2.s 2>&1 | FileCheck %s --check-prefix=DECODE2 --implicit-check-not=warning: +# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err1.s 2>&1 | FileCheck %s --check-prefix=ERR1 --implicit-check-not=error: +# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err2.s 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error: + +#--- a.s +4883ec08 31 # comment +# comment + ed4829 c390 +[c3c3][4829c3] +[90] + +# CHECK: subq $8, %rsp +# CHECK-NEXT: xorl %ebp, %ebp +# CHECK-NEXT: subq %rax, %rbx +# CHECK-NEXT: nop +# CHECK-NEXT: retq +# CHECK-NEXT: retq +# CHECK-NEXT: subq %rax, %rbx +# CHECK-NEXT: nop +# CHECK-EMPTY: + +#--- decode1.s +4889 + +# DECODE1: 1:1: warning: invalid instruction encoding + +#--- decode2.s +[4889][4889] [4889]4889c3 + [4889] + +# DECODE2: 1:2: warning: invalid instruction encoding +# DECODE2: 1:8: warning: invalid instruction encoding +# DECODE2: 1:15: warning: invalid instruction encoding +# DECODE2: 2:3: warning: invalid instruction encoding + +#--- err1.s +0x31ed +0xcc +g0 + +# ERR1: 1:1: error: invalid input token +# ERR1: 2:1: error: invalid input token +# ERR1: 3:1: error: invalid input token +# ERR1: xorl %ebp, %ebp +# ERR1-NEXT: int3 +# ERR1-EMPTY: + +#--- err2.s +g +90c +cc +c + +# ERR2: 1:1: error: expected two hex digits +# ERR2: 2:3: error: expected two hex digits +# ERR2: 4:1: error: expected two hex digits +# ERR2: nop +# ERR2-NEXT: int3 +# ERR2-EMPTY: diff --git a/llvm/tools/llvm-mc/Disassembler.cpp b/llvm/tools/llvm-mc/Disassembler.cpp index a588058..30577fe 100644 --- a/llvm/tools/llvm-mc/Disassembler.cpp +++ b/llvm/tools/llvm-mc/Disassembler.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Disassembler.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" @@ -94,10 +95,8 @@ static bool SkipToToken(StringRef &Str) { } } - -static bool ByteArrayFromString(ByteArrayTy &ByteArray, - StringRef &Str, - SourceMgr &SM) { +static bool byteArrayFromString(ByteArrayTy &ByteArray, StringRef &Str, + SourceMgr &SM, bool HexBytes) { while (SkipToToken(Str)) { // Handled by higher level if (Str[0] == '[' || Str[0] == ']') @@ -109,7 +108,24 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray, // Convert to a byte and add to the byte vector. unsigned ByteVal; - if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) { + if (HexBytes) { + if (Next < 2) { + SM.PrintMessage(SMLoc::getFromPointer(Value.data()), + SourceMgr::DK_Error, "expected two hex digits"); + Str = Str.substr(Next); + return true; + } + Next = 2; + unsigned C0 = hexDigitValue(Value[0]); + unsigned C1 = hexDigitValue(Value[1]); + if (C0 == -1u || C1 == -1u) { + SM.PrintMessage(SMLoc::getFromPointer(Value.data()), + SourceMgr::DK_Error, "invalid input token"); + Str = Str.substr(Next); + return true; + } + ByteVal = C0 * 16 + C1; + } else if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) { // If we have an error, print it and skip to the end of line. SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error, "invalid input token"); @@ -130,9 +146,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray, int Disassembler::disassemble(const Target &T, const std::string &Triple, MCSubtargetInfo &STI, MCStreamer &Streamer, MemoryBuffer &Buffer, SourceMgr &SM, - MCContext &Ctx, - const MCTargetOptions &MCOptions) { - + MCContext &Ctx, const MCTargetOptions &MCOptions, + bool HexBytes) { std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple)); if (!MRI) { errs() << "error: no register info for target " << Triple << "\n"; @@ -188,7 +203,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple, } // It's a real token, get the bytes and emit them - ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM); + ErrorOccurred |= byteArrayFromString(ByteArray, Str, SM, HexBytes); if (!ByteArray.first.empty()) ErrorOccurred |= diff --git a/llvm/tools/llvm-mc/Disassembler.h b/llvm/tools/llvm-mc/Disassembler.h index d0226ab..5efffca 100644 --- a/llvm/tools/llvm-mc/Disassembler.h +++ b/llvm/tools/llvm-mc/Disassembler.h @@ -32,7 +32,7 @@ public: static int disassemble(const Target &T, const std::string &Triple, MCSubtargetInfo &STI, MCStreamer &Streamer, MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx, - const MCTargetOptions &MCOptions); + const MCTargetOptions &MCOptions, bool HexBytes); }; } // namespace llvm diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index 898d79b..fd93d7e 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -94,6 +94,12 @@ static cl::opt<bool> cl::desc("Prefer hex format for immediate values"), cl::cat(MCCategory)); +static cl::opt<bool> + HexBytes("hex", + cl::desc("Take raw hexadecimal bytes as input for disassembly. " + "Whitespace is ignored"), + cl::cat(MCCategory)); + static cl::list<std::string> DefineSymbol("defsym", cl::desc("Defines a symbol to be an integer constant"), @@ -592,7 +598,7 @@ int main(int argc, char **argv) { } if (disassemble) Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer, - SrcMgr, Ctx, MCOptions); + SrcMgr, Ctx, MCOptions, HexBytes); // Keep output if no errors. if (Res == 0) { |