aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-12-13 21:07:51 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-12-13 21:07:51 +0000
commitcad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6 (patch)
treef908e4b0cdd17b3597b031b8b96e823f454695f1
parenta9f77c6df755841ba1ae2b5c56d0ca4d53907433 (diff)
downloadllvm-cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6.zip
llvm-cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6.tar.gz
llvm-cad7fa857cf6d9949f25dcc5bc1f313f7ec2b0b6.tar.bz2
AMDGPU: Partially fix disassembly of MIMG instructions
Stores failed to decode at all since they didn't have a DecoderNamespace set. Loads worked, but did not change the register width displayed to match the numbmer of enabled channels. The number of printed registers for vaddr is still wrong, but I don't think that's encoded in the instruction so there's not much we can do about that. Image atomics are still broken. MIMG is the same encoding for SI/VI, but the image atomic classes are split up into encoding specific versions unlike every other MIMG instruction. They have isAsmParserOnly set on them for some reason. dmask is also special for these, so we probably should not have it as an explicit operand as it is now. llvm-svn: 320614
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp65
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h5
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp42
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h12
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td9
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp64
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h6
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt39
9 files changed, 167 insertions, 78 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 4944786..8156599 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -23,7 +23,6 @@
using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
-#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
// Pin the vtable to this file.
@@ -56,59 +55,6 @@ bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
}
-static AMDGPU::Channels indexToChannel(unsigned Channel) {
- switch (Channel) {
- case 1:
- return AMDGPU::Channels_1;
- case 2:
- return AMDGPU::Channels_2;
- case 3:
- return AMDGPU::Channels_3;
- case 4:
- return AMDGPU::Channels_4;
- default:
- llvm_unreachable("invalid MIMG channel");
- }
-}
-
-// FIXME: Need to handle d16 images correctly.
-static unsigned rcToChannels(unsigned RCID) {
- switch (RCID) {
- case AMDGPU::VGPR_32RegClassID:
- return 1;
- case AMDGPU::VReg_64RegClassID:
- return 2;
- case AMDGPU::VReg_96RegClassID:
- return 3;
- case AMDGPU::VReg_128RegClassID:
- return 4;
- default:
- llvm_unreachable("invalid MIMG register class");
- }
-}
-
-int AMDGPUInstrInfo::getMaskedMIMGOp(unsigned Opc,
- unsigned NewChannels) const {
- AMDGPU::Channels Channel = indexToChannel(NewChannels);
- unsigned OrigChannels = rcToChannels(get(Opc).OpInfo[0].RegClass);
- if (NewChannels == OrigChannels)
- return Opc;
-
- switch (OrigChannels) {
- case 1:
- return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
- case 2:
- return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
- case 3:
- return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
- case 4:
- return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
- default:
- llvm_unreachable("invalid MIMG channel");
- }
-}
-
-
// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
enum SIEncodingFamily {
SI = 0,
@@ -118,17 +64,6 @@ enum SIEncodingFamily {
GFX9 = 4
};
-// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
-// header files, so we need to wrap it in a function that takes unsigned
-// instead.
-namespace llvm {
-namespace AMDGPU {
-static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
- return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
-}
-}
-}
-
static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
switch (ST.getGeneration()) {
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 4ab0515..a9fcd48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -22,6 +22,7 @@
#define GET_INSTRINFO_HEADER
#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_HEADER
namespace llvm {
@@ -49,10 +50,6 @@ public:
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
-
- /// \brief Given a MIMG \p MI that writes any number of channels, return the
- /// equivalent opcode that writes \p NewChannels Channels.
- int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index a33670c..4a3f2c9 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -234,6 +234,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
AMDGPU::OpName::src2_modifiers);
}
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) {
+ Res = convertMIMGInst(MI);
+ }
+
if (Res && IsSDWA)
Res = convertSDWAInst(MI);
@@ -260,6 +264,42 @@ DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
return MCDisassembler::Success;
}
+DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
+ int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vdata);
+
+ int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::dmask);
+ unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
+ if (DMask == 0)
+ return MCDisassembler::Success;
+
+ unsigned ChannelCount = countPopulation(DMask);
+ if (ChannelCount == 1)
+ return MCDisassembler::Success;
+
+ int NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), ChannelCount);
+ assert(NewOpcode != -1 && "could not find matching mimg channel instruction");
+ auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass;
+
+ // Widen the register to the correct number of enabled channels.
+ unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
+ auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
+ &MRI.getRegClass(RCID));
+ if (NewVdata == AMDGPU::NoRegister) {
+ // It's possible to encode this such that the low register + enabled
+ // components exceeds the register count.
+ return MCDisassembler::Success;
+ }
+
+ MI.setOpcode(NewOpcode);
+ // vaddr will be always appear as a single VGPR. This will look different than
+ // how it is usually emitted because the number of register components is not
+ // in the instruction encoding.
+ MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
+ return MCDisassembler::Success;
+}
+
const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
return getContext().getRegisterInfo()->
getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -786,7 +826,7 @@ static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
static MCDisassembler *createAMDGPUDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new AMDGPUDisassembler(STI, Ctx);
+ return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
}
extern "C" void LLVMInitializeAMDGPUDisassembler() {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 18a91356..ce396eb 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -17,16 +17,18 @@
#define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+
#include <algorithm>
#include <cstdint>
#include <memory>
namespace llvm {
-class MCContext;
class MCInst;
class MCOperand;
class MCSubtargetInfo;
@@ -38,13 +40,16 @@ class Twine;
class AMDGPUDisassembler : public MCDisassembler {
private:
+ std::unique_ptr<MCInstrInfo const> const MCII;
+ const MCRegisterInfo &MRI;
mutable ArrayRef<uint8_t> Bytes;
mutable uint32_t Literal;
mutable bool HasLiteral;
public:
- AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
- MCDisassembler(STI, Ctx) {}
+ AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ MCInstrInfo const *MCII) :
+ MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()) {}
~AMDGPUDisassembler() override = default;
@@ -64,6 +69,7 @@ public:
uint64_t Address) const;
DecodeStatus convertSDWAInst(MCInst &MI) const;
+ DecodeStatus convertMIMGInst(MCInst &MI) const;
MCOperand decodeOperand_VGPR_32(unsigned Val) const;
MCOperand decodeOperand_VS_32(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 9622bcc..30a2df5 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -63,13 +63,13 @@ multiclass MIMG_NoSampler <bits<7> op, string asm> {
class MIMG_Store_Helper <bits<7> op, string asm,
RegisterClass data_rc,
- RegisterClass addr_rc> : MIMG_Helper <
+ RegisterClass addr_rc,
+ string dns = ""> : MIMG_Helper <
(outs),
(ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
- asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
- >, MIMGe<op> {
+ asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> {
let ssamp = 0;
let mayLoad = 1; // TableGen requires this for matching with the intrinsics
let mayStore = 1;
@@ -81,7 +81,8 @@ class MIMG_Store_Helper <bits<7> op, string asm,
multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
RegisterClass data_rc,
int channels> {
- def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32>,
+ def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32,
+ !if(!eq(channels, 1), "AMDGPU", "")>,
MIMG_Mask<asm#"_V1", channels>;
def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>,
MIMG_Mask<asm#"_V2", channels>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9208762..d026210 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6652,7 +6652,8 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
unsigned BitsSet = countPopulation(NewDmask);
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
- int NewOpcode = TII->getMaskedMIMGOp(Node->getMachineOpcode(), BitsSet);
+ int NewOpcode = AMDGPU::getMaskedMIMGOp(*TII,
+ Node->getMachineOpcode(), BitsSet);
assert(NewOpcode != -1 &&
NewOpcode != static_cast<int>(Node->getMachineOpcode()) &&
"failed to find equivalent MIMG op");
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5a59e04..819a7ad 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -39,7 +40,9 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#define GET_INSTRINFO_NAMED_OPS
+#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRMAP_INFO
#undef GET_INSTRINFO_NAMED_OPS
namespace {
@@ -100,6 +103,66 @@ static cl::opt<bool> EnablePackedInlinableLiterals(
namespace AMDGPU {
+LLVM_READNONE
+static inline Channels indexToChannel(unsigned Channel) {
+ switch (Channel) {
+ case 1:
+ return AMDGPU::Channels_1;
+ case 2:
+ return AMDGPU::Channels_2;
+ case 3:
+ return AMDGPU::Channels_3;
+ case 4:
+ return AMDGPU::Channels_4;
+ default:
+ llvm_unreachable("invalid MIMG channel");
+ }
+}
+
+
+// FIXME: Need to handle d16 images correctly.
+static unsigned rcToChannels(unsigned RCID) {
+ switch (RCID) {
+ case AMDGPU::VGPR_32RegClassID:
+ return 1;
+ case AMDGPU::VReg_64RegClassID:
+ return 2;
+ case AMDGPU::VReg_96RegClassID:
+ return 3;
+ case AMDGPU::VReg_128RegClassID:
+ return 4;
+ default:
+ llvm_unreachable("invalid MIMG register class");
+ }
+}
+
+int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
+ AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels);
+ unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
+ if (NewChannels == OrigChannels)
+ return Opc;
+
+ switch (OrigChannels) {
+ case 1:
+ return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
+ case 2:
+ return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
+ case 3:
+ return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
+ case 4:
+ return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
+ default:
+ llvm_unreachable("invalid MIMG channel");
+ }
+}
+
+// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
+// header files, so we need to wrap it in a function that takes unsigned
+// instead.
+int getMCOpcode(uint16_t Opcode, unsigned Gen) {
+ return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
+}
+
namespace IsaInfo {
IsaVersion getIsaVersion(const FeatureBitset &Features) {
@@ -833,6 +896,7 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
return isGCN3Encoding(ST) ?
isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
}
+
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c4b7779..a215b44 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -156,6 +156,12 @@ unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+LLVM_READONLY
+int getMaskedMIMGOp(const MCInstrInfo &MII,
+ unsigned Opc, unsigned NewChannels);
+LLVM_READONLY
+int getMCOpcode(uint16_t Opcode, unsigned Gen);
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
diff --git a/llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt
new file mode 100644
index 0000000..5126968
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt
@@ -0,0 +1,39 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck -check-prefix=VI %s
+
+# VI: image_load v[0:3], v4, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x00,0xf0,0x04,0x00,0x02,0x00]
+0x00 0x1f 0x00 0xf0 0x04 0x00 0x02 0x00
+
+# VI: image_load v[0:2], v4, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x00,0xf0,0x04,0x00,0x02,0x00]
+0x00 0x1e 0x00 0xf0 0x04 0x00 0x02 0x00
+
+# VI: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm ; encoding: [0x00,0x13,0x00,0xf0,0x00,0x00,0x00,0x00]
+0x00 0x13 0x00 0xf0 0x00 0x00 0x00 0x00
+
+# VI: image_load v0, v0, s[0:7] dmask:0x1 unorm ; encoding: [0x00,0x11,0x00,0xf0,0x00,0x00,0x00,0x00]
+0x00 0x11 0x00 0xf0 0x00 0x00 0x00 0x00
+
+# VI: image_store v[0:3], v4, s[0:7] dmask:0xf unorm ; encoding: [0x00,0x1f,0x20,0xf0,0x04,0x00,0x00,0x00]
+0x00 0x1f 0x20 0xf0 0x04 0x00 0x00 0x00
+
+# VI: image_store v[0:2], v4, s[0:7] dmask:0xe unorm ; encoding: [0x00,0x1e,0x20,0xf0,0x04,0x00,0x00,0x00]
+0x00 0x1e 0x20 0xf0 0x04 0x00 0x00 0x00
+
+# VI: image_store v[0:1], v2, s[0:7] dmask:0x3 unorm ; encoding: [0x00,0x13,0x20,0xf0,0x02,0x00,0x00,0x00]
+0x00 0x13 0x20 0xf0 0x02 0x00 0x00 0x00
+
+# VI: image_store v0, v1, s[0:7] dmask:0x1 unorm ; encoding: [0x00,0x11,0x20,0xf0,0x01,0x00,0x00,0x00]
+0x00 0x11 0x20 0xf0 0x01 0x00 0x00 0x00
+
+# Test dmask == 0
+# VI: image_load v0, v4, s[8:15] unorm ; encoding: [0x00,0x10,0x00,0xf0,0x04,0x00,0x02,0x00]
+0x00 0x10 0x00 0xf0 0x04 0x00 0x02 0x00
+
+# Test out of bounds register width
+# VI: image_load v254, v0, s[0:7] dmask:0x7 unorm ; encoding: [0x00,0x17,0x00,0xf0,0x00,0xfe,0x00,0x00]
+0x00 0x17 0x00 0xf0 0x00 0xfe 0x00 0x00
+
+# VI: image_load v255, v0, s[0:7] dmask:0x1 unorm ; encoding: [0x00,0x11,0x00,0xf0,0x00,0xff,0x00,0x00]
+0x00 0x11 0x00 0xf0 0x00 0xff 0x00 0x00
+
+# VI: image_load v255, v0, s[0:7] dmask:0x3 unorm ; encoding: [0x00,0x13,0x00,0xf0,0x00,0xff,0x00,0x00]
+0x00 0x13 0x00 0xf0 0x00 0xff 0x00 0x00