aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFangrui Song <i@maskray.me>2024-12-16 21:05:08 -0800
committerGitHub <noreply@github.com>2024-12-16 21:05:08 -0800
commitc6ff809ae9acbc90455dc8b58b2dae84a13366cf (patch)
tree61f7a2c7b5ce534298ad01e296a75711566d47af
parente2a94a97bdf26198ab254d61ee4be23a140dab2d (diff)
downloadllvm-c6ff809ae9acbc90455dc8b58b2dae84a13366cf.zip
llvm-c6ff809ae9acbc90455dc8b58b2dae84a13366cf.tar.gz
llvm-c6ff809ae9acbc90455dc8b58b2dae84a13366cf.tar.bz2
[llvm-mc] Add --hex to disassemble hex bytes
`--disassemble`/`--cdis` parses input bytes as decimal, 0bbin, 0ooct, or 0xhex. While the hexadecimal digit form is most commonly used, requiring a 0x prefix for each byte (`0x48 0x29 0xc3`) is cumbersome. Tools like xxd -p and rz-asm use a plain hex dump form without the 0x prefix or space separator. This patch adds --hex to disassemble such hex bytes with optional whitespace. ``` % rz-asm -a x86 -b 64 -d 4829c34829c4 sub rbx, rax sub rsp, rax % llvm-mc -triple=x86_64 --cdis --hex --output-asm-variant=1 <<< 4829c34829c4 .text sub rbx, rax sub rsp, rax ``` Pull Request: https://github.com/llvm/llvm-project/pull/119992
-rw-r--r--llvm/docs/CommandGuide/llvm-mc.rst4
-rw-r--r--llvm/test/MC/Disassembler/X86/hex-bytes.txt62
-rw-r--r--llvm/tools/llvm-mc/Disassembler.cpp33
-rw-r--r--llvm/tools/llvm-mc/Disassembler.h2
-rw-r--r--llvm/tools/llvm-mc/llvm-mc.cpp8
5 files changed, 98 insertions, 11 deletions
diff --git a/llvm/docs/CommandGuide/llvm-mc.rst b/llvm/docs/CommandGuide/llvm-mc.rst
index c5d2f93..8d6346f 100644
--- a/llvm/docs/CommandGuide/llvm-mc.rst
+++ b/llvm/docs/CommandGuide/llvm-mc.rst
@@ -92,6 +92,10 @@ End-user Options
Generate DWARF debugging info for assembly source files.
+.. option:: --hex
+
+ Take raw hexadecimal bytes as input for disassembly. Whitespace is ignored.
+
.. option:: --large-code-model
Create CFI directives that assume the code might be more than 2 GB.
diff --git a/llvm/test/MC/Disassembler/X86/hex-bytes.txt b/llvm/test/MC/Disassembler/X86/hex-bytes.txt
new file mode 100644
index 0000000..7d94ca9
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/hex-bytes.txt
@@ -0,0 +1,62 @@
+# RUN: rm -rf %t && split-file %s %t && cd %t
+# RUN: llvm-mc -triple=x86_64 --disassemble --hex a.s | FileCheck %s
+# RUN: llvm-mc -triple=x86_64 --disassemble --hex decode1.s 2>&1 | FileCheck %s --check-prefix=DECODE1 --implicit-check-not=warning:
+# RUN: not llvm-mc -triple=x86_64 --disassemble --hex decode2.s 2>&1 | FileCheck %s --check-prefix=DECODE2 --implicit-check-not=warning:
+# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err1.s 2>&1 | FileCheck %s --check-prefix=ERR1 --implicit-check-not=error:
+# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err2.s 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error:
+
+#--- a.s
+4883ec08 31 # comment
+# comment
+ ed4829 c390
+[c3c3][4829c3]
+[90]
+
+# CHECK: subq $8, %rsp
+# CHECK-NEXT: xorl %ebp, %ebp
+# CHECK-NEXT: subq %rax, %rbx
+# CHECK-NEXT: nop
+# CHECK-NEXT: retq
+# CHECK-NEXT: retq
+# CHECK-NEXT: subq %rax, %rbx
+# CHECK-NEXT: nop
+# CHECK-EMPTY:
+
+#--- decode1.s
+4889
+
+# DECODE1: 1:1: warning: invalid instruction encoding
+
+#--- decode2.s
+[4889][4889] [4889]4889c3
+ [4889]
+
+# DECODE2: 1:2: warning: invalid instruction encoding
+# DECODE2: 1:8: warning: invalid instruction encoding
+# DECODE2: 1:15: warning: invalid instruction encoding
+# DECODE2: 2:3: warning: invalid instruction encoding
+
+#--- err1.s
+0x31ed
+0xcc
+g0
+
+# ERR1: 1:1: error: invalid input token
+# ERR1: 2:1: error: invalid input token
+# ERR1: 3:1: error: invalid input token
+# ERR1: xorl %ebp, %ebp
+# ERR1-NEXT: int3
+# ERR1-EMPTY:
+
+#--- err2.s
+g
+90c
+cc
+c
+
+# ERR2: 1:1: error: expected two hex digits
+# ERR2: 2:3: error: expected two hex digits
+# ERR2: 4:1: error: expected two hex digits
+# ERR2: nop
+# ERR2-NEXT: int3
+# ERR2-EMPTY:
diff --git a/llvm/tools/llvm-mc/Disassembler.cpp b/llvm/tools/llvm-mc/Disassembler.cpp
index a588058..30577fe 100644
--- a/llvm/tools/llvm-mc/Disassembler.cpp
+++ b/llvm/tools/llvm-mc/Disassembler.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "Disassembler.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -94,10 +95,8 @@ static bool SkipToToken(StringRef &Str) {
}
}
-
-static bool ByteArrayFromString(ByteArrayTy &ByteArray,
- StringRef &Str,
- SourceMgr &SM) {
+static bool byteArrayFromString(ByteArrayTy &ByteArray, StringRef &Str,
+ SourceMgr &SM, bool HexBytes) {
while (SkipToToken(Str)) {
// Handled by higher level
if (Str[0] == '[' || Str[0] == ']')
@@ -109,7 +108,24 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
// Convert to a byte and add to the byte vector.
unsigned ByteVal;
- if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
+ if (HexBytes) {
+ if (Next < 2) {
+ SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
+ SourceMgr::DK_Error, "expected two hex digits");
+ Str = Str.substr(Next);
+ return true;
+ }
+ Next = 2;
+ unsigned C0 = hexDigitValue(Value[0]);
+ unsigned C1 = hexDigitValue(Value[1]);
+ if (C0 == -1u || C1 == -1u) {
+ SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
+ SourceMgr::DK_Error, "invalid input token");
+ Str = Str.substr(Next);
+ return true;
+ }
+ ByteVal = C0 * 16 + C1;
+ } else if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
// If we have an error, print it and skip to the end of line.
SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
"invalid input token");
@@ -130,9 +146,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
int Disassembler::disassemble(const Target &T, const std::string &Triple,
MCSubtargetInfo &STI, MCStreamer &Streamer,
MemoryBuffer &Buffer, SourceMgr &SM,
- MCContext &Ctx,
- const MCTargetOptions &MCOptions) {
-
+ MCContext &Ctx, const MCTargetOptions &MCOptions,
+ bool HexBytes) {
std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
if (!MRI) {
errs() << "error: no register info for target " << Triple << "\n";
@@ -188,7 +203,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
}
// It's a real token, get the bytes and emit them
- ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
+ ErrorOccurred |= byteArrayFromString(ByteArray, Str, SM, HexBytes);
if (!ByteArray.first.empty())
ErrorOccurred |=
diff --git a/llvm/tools/llvm-mc/Disassembler.h b/llvm/tools/llvm-mc/Disassembler.h
index d0226ab..5efffca 100644
--- a/llvm/tools/llvm-mc/Disassembler.h
+++ b/llvm/tools/llvm-mc/Disassembler.h
@@ -32,7 +32,7 @@ public:
static int disassemble(const Target &T, const std::string &Triple,
MCSubtargetInfo &STI, MCStreamer &Streamer,
MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx,
- const MCTargetOptions &MCOptions);
+ const MCTargetOptions &MCOptions, bool HexBytes);
};
} // namespace llvm
diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp
index 898d79b..fd93d7e 100644
--- a/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -94,6 +94,12 @@ static cl::opt<bool>
cl::desc("Prefer hex format for immediate values"),
cl::cat(MCCategory));
+static cl::opt<bool>
+ HexBytes("hex",
+ cl::desc("Take raw hexadecimal bytes as input for disassembly. "
+ "Whitespace is ignored"),
+ cl::cat(MCCategory));
+
static cl::list<std::string>
DefineSymbol("defsym",
cl::desc("Defines a symbol to be an integer constant"),
@@ -592,7 +598,7 @@ int main(int argc, char **argv) {
}
if (disassemble)
Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer,
- SrcMgr, Ctx, MCOptions);
+ SrcMgr, Ctx, MCOptions, HexBytes);
// Keep output if no errors.
if (Res == 0) {