aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h14
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp125
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp148
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td17
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td4
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp106
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h11
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td9
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll67
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/add.v2i16.ll73
-rw-r--r--llvm/test/CodeGen/AMDGPU/calling-conventions.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll40
-rw-r--r--llvm/test/CodeGen/AMDGPU/fma.f16.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll145
-rw-r--r--llvm/test/CodeGen/AMDGPU/fptosi.f16.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/immv216.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll813
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll29
-rw-r--r--llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll1055
-rw-r--r--llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/sub.v2i16.ll13
-rw-r--r--llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll46
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s59
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s57
-rw-r--r--llvm/test/MC/AMDGPU/literalv216.s20
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt2
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt56
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt56
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt120
39 files changed, 1522 insertions, 1729 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index bffea82..48ee0d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -317,26 +317,16 @@ void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
}
}
-bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
- bool Negated) const {
+bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
if (N->isUndef())
return true;
const SIInstrInfo *TII = Subtarget->getInstrInfo();
- if (Negated) {
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
- return TII->isInlineConstant(-C->getAPIntValue());
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+ return TII->isInlineConstant(C->getAPIntValue());
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
- return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
-
- } else {
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
- return TII->isInlineConstant(C->getAPIntValue());
-
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
- return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
- }
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+ return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 374108a..df4a211 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -50,15 +50,13 @@ static inline bool getConstantValue(SDValue N, uint32_t &Out) {
}
// TODO: Handle undef as zero
-static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
- bool Negate = false) {
+static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
uint32_t LHSVal, RHSVal;
if (getConstantValue(N->getOperand(0), LHSVal) &&
getConstantValue(N->getOperand(1), RHSVal)) {
SDLoc SL(N);
- uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
- : (LHSVal & 0xffff) | (RHSVal << 16);
+ uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
DAG.getTargetConstant(K, SL, MVT::i32));
}
@@ -66,9 +64,6 @@ static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
return nullptr;
}
-static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
- return packConstantV2I16(N, DAG, true);
-}
} // namespace
/// AMDGPU specific code to select AMDGPU machine instructions for
@@ -110,10 +105,7 @@ protected:
private:
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
- bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
- bool isNegInlineImmediate(const SDNode *N) const {
- return isInlineImmediate(N, true);
- }
+ bool isInlineImmediate(const SDNode *N) const;
bool isInlineImmediate16(int64_t Imm) const {
return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5f2b7c0..b7f0438 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1865,6 +1865,9 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
return &APFloat::IEEEsingle();
@@ -1879,13 +1882,10 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
- case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_KIMM16:
return &APFloat::IEEEhalf();
@@ -2033,9 +2033,14 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
// We allow fp literals with f16x2 operands assuming that the specified
// literal goes into the lower half and the upper half is zero. We also
// require that the literal may be losslessly converted to f16.
- MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
- (type == MVT::v2i16)? MVT::i16 :
- (type == MVT::v2f32)? MVT::f32 : type;
+ //
+ // For i16x2 operands, we assume that the specified literal is encoded as a
+ // single-precision float. This is pretty odd, but it matches SP3 and what
+ // happens in hardware.
+ MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
+ : (type == MVT::v2i16) ? MVT::f32
+ : (type == MVT::v2f32) ? MVT::f32
+ : type;
APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
@@ -3401,12 +3406,12 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
- return AMDGPU::isInlinableIntLiteralV216(Val);
+ return AMDGPU::isInlinableLiteralV2I16(Val);
if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
- return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
+ return AMDGPU::isInlinableLiteralV2F16(Val);
return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
}
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 67be7b0..9dff3f6 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -182,6 +182,9 @@ static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \
false, ImmWidth)
+#define DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(Name, OpWidth, ImmWidth) \
+ DECODE_SrcOp(decodeOperand_##Name, 9, OpWidth, Imm, false, ImmWidth)
+
// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
// and decode using 'enum10' from decodeSrcOp.
#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \
@@ -262,6 +265,9 @@ DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2I16, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9_TYPED(VS_32_ImmV2F16, OPW32, 16)
+
DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64)
DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32)
DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64)
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 91a7093..b85eb76 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1096,7 +1096,7 @@ public:
bool hasDstSelForwardingHazard() const { return GFX940Insts; }
// Cannot use op_sel with v_dot instructions.
- bool hasDOTOpSelHazard() const { return GFX940Insts; }
+ bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
// Does not have HW interlocs for VALU writing and then reading SGPRs.
bool hasVDecCoExecHazard() const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index ef1b85f..6c7977e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -460,56 +460,84 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
}
}
-void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- int16_t SImm = static_cast<int16_t>(Imm);
- if (isInlinableIntLiteral(SImm)) {
- O << SImm;
- return;
- }
-
+// This must accept a 32-bit immediate value to correctly handle packed 16-bit
+// operations.
+static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
if (Imm == 0x3C00)
- O<< "1.0";
+ O << "1.0";
else if (Imm == 0xBC00)
- O<< "-1.0";
+ O << "-1.0";
else if (Imm == 0x3800)
- O<< "0.5";
+ O << "0.5";
else if (Imm == 0xB800)
- O<< "-0.5";
+ O << "-0.5";
else if (Imm == 0x4000)
- O<< "2.0";
+ O << "2.0";
else if (Imm == 0xC000)
- O<< "-2.0";
+ O << "-2.0";
else if (Imm == 0x4400)
- O<< "4.0";
+ O << "4.0";
else if (Imm == 0xC400)
- O<< "-4.0";
- else if (Imm == 0x3118 &&
- STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) {
+ O << "-4.0";
+ else if (Imm == 0x3118 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494";
- } else {
- uint64_t Imm16 = static_cast<uint16_t>(Imm);
- O << formatHex(Imm16);
- }
-}
+ else
+ return false;
-void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- uint16_t Lo16 = static_cast<uint16_t>(Imm);
- printImmediate16(Lo16, STI, O);
+ return true;
}
-void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
+void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
+ int16_t SImm = static_cast<int16_t>(Imm);
+ if (isInlinableIntLiteral(SImm)) {
+ O << SImm;
+ return;
+ }
+
+ uint16_t HImm = static_cast<uint16_t>(Imm);
+ if (printImmediateFloat16(HImm, STI, O))
+ return;
+
+ uint64_t Imm16 = static_cast<uint16_t>(Imm);
+ O << formatHex(Imm16);
+}
+
+void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
int32_t SImm = static_cast<int32_t>(Imm);
- if (SImm >= -16 && SImm <= 64) {
+ if (isInlinableIntLiteral(SImm)) {
O << SImm;
return;
}
+ switch (OpType) {
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ if (printImmediateFloat32(Imm, STI, O))
+ return;
+ break;
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ if (isUInt<16>(Imm) &&
+ printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
+ return;
+ break;
+ default:
+ llvm_unreachable("bad operand type");
+ }
+
+ O << formatHex(static_cast<uint64_t>(Imm));
+}
+
+bool AMDGPUInstPrinter::printImmediateFloat32(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
if (Imm == llvm::bit_cast<uint32_t>(0.0f))
O << "0.0";
else if (Imm == llvm::bit_cast<uint32_t>(1.0f))
@@ -532,7 +560,24 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494";
else
- O << formatHex(static_cast<uint64_t>(Imm));
+ return false;
+
+ return true;
+}
+
+void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ int32_t SImm = static_cast<int32_t>(Imm);
+ if (isInlinableIntLiteral(SImm)) {
+ O << SImm;
+ return;
+ }
+
+ if (printImmediateFloat32(Imm, STI, O))
+ return;
+
+ O << formatHex(static_cast<uint64_t>(Imm));
}
void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
@@ -755,25 +800,11 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
break;
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
- if (!isUInt<16>(Op.getImm()) &&
- STI.hasFeature(AMDGPU::FeatureVOP3Literal)) {
- printImmediate32(Op.getImm(), STI, O);
- break;
- }
-
- // Deal with 16-bit FP inline immediates not working.
- if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) {
- printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O);
- break;
- }
- [[fallthrough]];
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
- printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O);
- break;
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
- printImmediateV216(Op.getImm(), STI, O);
+ printImmediateV216(Op.getImm(), OpTy, STI, O);
break;
case MCOI::OPERAND_UNKNOWN:
case MCOI::OPERAND_PCREL:
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index f2f985f..e3958f8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -88,8 +88,10 @@ private:
raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
- raw_ostream &O);
+ void printImmediateV216(uint32_t Imm, uint8_t OpType,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index b403d69..de1abaf 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -284,22 +284,15 @@ AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_V2INT16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16: {
- if (!isUInt<16>(Imm) && STI.hasFeature(AMDGPU::FeatureVOP3Literal))
- return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
- if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
- return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
- [[fallthrough]];
- }
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
- return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ return AMDGPU::getInlineEncodingV2I16(static_cast<uint32_t>(Imm))
+ .value_or(255);
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
- uint16_t Lo16 = static_cast<uint16_t>(Imm);
- uint32_t Encoding = getLit16Encoding(Lo16, STI);
- return Encoding;
- }
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ return AMDGPU::getInlineEncodingV2F16(static_cast<uint32_t>(Imm))
+ .value_or(255);
case AMDGPU::OPERAND_KIMM32:
case AMDGPU::OPERAND_KIMM16:
return MO.getImm();
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 709de61..aa7639a 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -208,9 +208,7 @@ bool SIFoldOperands::canUseImmWithOpSel(FoldCandidate &Fold) const {
assert(Old.isReg() && Fold.isImm());
if (!(TSFlags & SIInstrFlags::IsPacked) || (TSFlags & SIInstrFlags::IsMAI) ||
- (ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT)) ||
- isUInt<16>(Fold.ImmToFold) ||
- !AMDGPU::isFoldableLiteralV216(Fold.ImmToFold, ST->hasInv2PiInlineImm()))
+ (ST->hasDOTOpSelHazard() && (TSFlags & SIInstrFlags::IsDOT)))
return false;
unsigned Opcode = MI->getOpcode();
@@ -234,42 +232,123 @@ bool SIFoldOperands::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
unsigned Opcode = MI->getOpcode();
int OpNo = MI->getOperandNo(&Old);
+ uint8_t OpType = TII->get(Opcode).operands()[OpNo].OperandType;
+
+ // If the literal can be inlined as-is, apply it and short-circuit the
+ // tests below. The main motivation for this is to avoid unintuitive
+ // uses of opsel.
+ if (AMDGPU::isInlinableLiteralV216(Fold.ImmToFold, OpType)) {
+ Old.ChangeToImmediate(Fold.ImmToFold);
+ return true;
+ }
- // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
- // already set.
+ // Refer to op_sel/op_sel_hi and check if we can change the immediate and
+ // op_sel in a way that allows an inline constant.
int ModIdx = -1;
- if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
+ unsigned SrcIdx = ~0;
+ if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) {
ModIdx = AMDGPU::OpName::src0_modifiers;
- else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
+ SrcIdx = 0;
+ } else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) {
ModIdx = AMDGPU::OpName::src1_modifiers;
- else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
+ SrcIdx = 1;
+ } else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) {
ModIdx = AMDGPU::OpName::src2_modifiers;
+ SrcIdx = 2;
+ }
assert(ModIdx != -1);
ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
MachineOperand &Mod = MI->getOperand(ModIdx);
- unsigned Val = Mod.getImm();
- if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
+ unsigned ModVal = Mod.getImm();
+
+ uint16_t ImmLo = static_cast<uint16_t>(
+ Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0));
+ uint16_t ImmHi = static_cast<uint16_t>(
+ Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0));
+ uint32_t Imm = (static_cast<uint32_t>(ImmHi) << 16) | ImmLo;
+ unsigned NewModVal = ModVal & ~(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1);
+
+ // Helper function that attempts to inline the given value with a newly
+ // chosen opsel pattern.
+ auto tryFoldToInline = [&](uint32_t Imm) -> bool {
+ if (AMDGPU::isInlinableLiteralV216(Imm, OpType)) {
+ Mod.setImm(NewModVal | SISrcMods::OP_SEL_1);
+ Old.ChangeToImmediate(Imm);
+ return true;
+ }
+
+ // Try to shuffle the halves around and leverage opsel to get an inline
+ // constant.
+ uint16_t Lo = static_cast<uint16_t>(Imm);
+ uint16_t Hi = static_cast<uint16_t>(Imm >> 16);
+ if (Lo == Hi) {
+ if (AMDGPU::isInlinableLiteralV216(Lo, OpType)) {
+ Mod.setImm(NewModVal);
+ Old.ChangeToImmediate(Lo);
+ return true;
+ }
+
+ if (static_cast<int16_t>(Lo) < 0) {
+ int32_t SExt = static_cast<int16_t>(Lo);
+ if (AMDGPU::isInlinableLiteralV216(SExt, OpType)) {
+ Mod.setImm(NewModVal);
+ Old.ChangeToImmediate(SExt);
+ return true;
+ }
+ }
+
+ // This check is only useful for integer instructions
+ if (OpType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
+ OpType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16) {
+ if (AMDGPU::isInlinableLiteralV216(Lo << 16, OpType)) {
+ Mod.setImm(NewModVal | SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1);
+ Old.ChangeToImmediate(static_cast<uint32_t>(Lo) << 16);
+ return true;
+ }
+ }
+ } else {
+ uint32_t Swapped = (static_cast<uint32_t>(Lo) << 16) | Hi;
+ if (AMDGPU::isInlinableLiteralV216(Swapped, OpType)) {
+ Mod.setImm(NewModVal | SISrcMods::OP_SEL_0);
+ Old.ChangeToImmediate(Swapped);
+ return true;
+ }
+ }
+
return false;
+ };
- // Only apply the following transformation if that operand requires
- // a packed immediate.
- // If upper part is all zero we do not need op_sel_hi.
- if (!(Fold.ImmToFold & 0xffff)) {
- MachineOperand New =
- MachineOperand::CreateImm((Fold.ImmToFold >> 16) & 0xffff);
- if (!TII->isOperandLegal(*MI, OpNo, &New))
- return false;
- Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
- Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
- Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+ if (tryFoldToInline(Imm))
return true;
+
+ // Replace integer addition by subtraction and vice versa if it allows
+ // folding the immediate to an inline constant.
+ //
+ // We should only ever get here for SrcIdx == 1 due to canonicalization
+ // earlier in the pipeline, but we double-check here to be safe / fully
+ // general.
+ bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
+ bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
+ if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
+ unsigned ClampIdx =
+ AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::clamp);
+ bool Clamp = MI->getOperand(ClampIdx).getImm() != 0;
+
+ if (!Clamp) {
+ uint16_t NegLo = -static_cast<uint16_t>(Imm);
+ uint16_t NegHi = -static_cast<uint16_t>(Imm >> 16);
+ uint32_t NegImm = (static_cast<uint32_t>(NegHi) << 16) | NegLo;
+
+ if (tryFoldToInline(NegImm)) {
+ unsigned NegOpcode =
+ IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
+ MI->setDesc(TII->get(NegOpcode));
+ return true;
+ }
+ }
}
- MachineOperand New = MachineOperand::CreateImm(Fold.ImmToFold & 0xffff);
- if (!TII->isOperandLegal(*MI, OpNo, &New))
- return false;
- Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
- Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
- return true;
+
+ return false;
}
bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
@@ -277,8 +356,19 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const {
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
assert(Old.isReg());
- if (Fold.isImm() && canUseImmWithOpSel(Fold))
- return tryFoldImmWithOpSel(Fold);
+ if (Fold.isImm() && canUseImmWithOpSel(Fold)) {
+ if (tryFoldImmWithOpSel(Fold))
+ return true;
+
+ // We can't represent the candidate as an inline constant. Try as a literal
+ // with the original opsel, checking constant bus limitations.
+ MachineOperand New = MachineOperand::CreateImm(Fold.ImmToFold);
+ int OpNo = MI->getOperandNo(&Old);
+ if (!TII->isOperandLegal(*MI, OpNo, &New))
+ return false;
+ Old.ChangeToImmediate(Fold.ImmToFold);
+ return true;
+ }
if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
MachineBasicBlock *MBB = MI->getParent();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 396d22c..6799292 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4153,15 +4153,15 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
- return (isInt<16>(Imm) || isUInt<16>(Imm)) &&
- AMDGPU::isInlinableIntLiteral((int16_t)Imm);
+ return AMDGPU::isInlinableLiteralV2I16(Imm);
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ return AMDGPU::isInlinableLiteralV2F16(Imm);
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
// A few special case instructions have 16-bit operands on subtargets
// where 16-bit instructions are not legal.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 50724fd..f07b8fa0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -860,23 +860,6 @@ def ShiftAmt32Imm : ImmLeaf <i32, [{
return Imm < 32;
}]>;
-def getNegV2I16Imm : SDNodeXForm<build_vector, [{
- return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
-}]>;
-
-def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
- assert(N->getNumOperands() == 2);
- assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
- SDValue Src0 = N->getOperand(0);
- SDValue Src1 = N->getOperand(1);
- if (Src0 == Src1)
- return isNegInlineImmediate(Src0.getNode());
-
- return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
- (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
-}], getNegV2I16Imm>;
-
-
def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
return fp16SrcZerosHighBits(N->getOpcode());
}]>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index c94b894..1d197dc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1152,11 +1152,11 @@ class RegOrF32 <string RegisterClass, string OperandTypePrefix>
class RegOrV2B16 <string RegisterClass, string OperandTypePrefix>
: RegOrImmOperand <RegisterClass, OperandTypePrefix # "_V2INT16",
- !subst("_v2b16", "V2B16", NAME), "_Imm16">;
+ !subst("_v2b16", "V2B16", NAME), "_ImmV2I16">;
class RegOrV2F16 <string RegisterClass, string OperandTypePrefix>
: RegOrImmOperand <RegisterClass, OperandTypePrefix # "_V2FP16",
- !subst("_v2f16", "V2F16", NAME), "_Imm16">;
+ !subst("_v2f16", "V2F16", NAME), "_ImmV2F16">;
class RegOrF64 <string RegisterClass, string OperandTypePrefix>
: RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP64",
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index a91d771..26ba257 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2506,53 +2506,95 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3118; // 1/2pi
}
-bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
- assert(HasInv2Pi);
-
- if (isInt<16>(Literal) || isUInt<16>(Literal)) {
- int16_t Trunc = static_cast<int16_t>(Literal);
- return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
+std::optional<unsigned> getInlineEncodingV216(bool IsFloat, uint32_t Literal) {
+ // Unfortunately, the Instruction Set Architecture Reference Guide is
+ // misleading about how the inline operands work for (packed) 16-bit
+ // instructions. In a nutshell, the actual HW behavior is:
+ //
+ // - integer encodings (-16 .. 64) are always produced as sign-extended
+ // 32-bit values
+ // - float encodings are produced as:
+ // - for F16 instructions: corresponding half-precision float values in
+ // the LSBs, 0 in the MSBs
+ // - for UI16 instructions: corresponding single-precision float value
+ int32_t Signed = static_cast<int32_t>(Literal);
+ if (Signed >= 0 && Signed <= 64)
+ return 128 + Signed;
+
+ if (Signed >= -16 && Signed <= -1)
+ return 192 + std::abs(Signed);
+
+ if (IsFloat) {
+ // clang-format off
+ switch (Literal) {
+ case 0x3800: return 240; // 0.5
+ case 0xB800: return 241; // -0.5
+ case 0x3C00: return 242; // 1.0
+ case 0xBC00: return 243; // -1.0
+ case 0x4000: return 244; // 2.0
+ case 0xC000: return 245; // -2.0
+ case 0x4400: return 246; // 4.0
+ case 0xC400: return 247; // -4.0
+ case 0x3118: return 248; // 1.0 / (2.0 * pi)
+ default: break;
+ }
+ // clang-format on
+ } else {
+ // clang-format off
+ switch (Literal) {
+ case 0x3F000000: return 240; // 0.5
+ case 0xBF000000: return 241; // -0.5
+ case 0x3F800000: return 242; // 1.0
+ case 0xBF800000: return 243; // -1.0
+ case 0x40000000: return 244; // 2.0
+ case 0xC0000000: return 245; // -2.0
+ case 0x40800000: return 246; // 4.0
+ case 0xC0800000: return 247; // -4.0
+ case 0x3E22F983: return 248; // 1.0 / (2.0 * pi)
+ default: break;
+ }
+ // clang-format on
}
- if (!(Literal & 0xffff))
- return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
- int16_t Lo16 = static_cast<int16_t>(Literal);
- int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
- return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
+ return {};
}
-bool isInlinableIntLiteralV216(int32_t Literal) {
- int16_t Lo16 = static_cast<int16_t>(Literal);
- if (isInt<16>(Literal) || isUInt<16>(Literal))
- return isInlinableIntLiteral(Lo16);
+// Encoding of the literal as an inline constant for a V_PK_*_IU16 instruction
+// or nullopt.
+std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal) {
+ return getInlineEncodingV216(false, Literal);
+}
- int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
- if (!(Literal & 0xffff))
- return isInlinableIntLiteral(Hi16);
- return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
+// Encoding of the literal as an inline constant for a V_PK_*_F16 instruction
+// or nullopt.
+std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) {
+ return getInlineEncodingV216(true, Literal);
}
-bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi, uint8_t OpType) {
+// Whether the given literal can be inlined for a V_PK_* instruction.
+bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) {
switch (OpType) {
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ return getInlineEncodingV216(false, Literal).has_value();
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- return isInlinableLiteralV216(Literal, HasInv2Pi);
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
+ return getInlineEncodingV216(true, Literal).has_value();
default:
- return isInlinableIntLiteralV216(Literal);
+ llvm_unreachable("bad packed operand type");
}
}
-bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
- assert(HasInv2Pi);
-
- int16_t Lo16 = static_cast<int16_t>(Literal);
- if (isInt<16>(Literal) || isUInt<16>(Literal))
- return true;
+// Whether the given literal can be inlined for a V_PK_*_IU16 instruction.
+bool isInlinableLiteralV2I16(uint32_t Literal) {
+ return getInlineEncodingV2I16(Literal).has_value();
+}
- int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
- if (!(Literal & 0xffff))
- return true;
- return Lo16 == Hi16;
+// Whether the given literal can be inlined for a V_PK_*_F16 instruction.
+bool isInlinableLiteralV2F16(uint32_t Literal) {
+ return getInlineEncodingV2F16(Literal).has_value();
}
bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 3c9f330..50c7417 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1291,16 +1291,19 @@ LLVM_READNONE
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
LLVM_READNONE
-bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
LLVM_READNONE
-bool isInlinableIntLiteralV216(int32_t Literal);
+std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
LLVM_READNONE
-bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi, uint8_t OpType);
+bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType);
LLVM_READNONE
-bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
+bool isInlinableLiteralV2I16(uint32_t Literal);
+
+LLVM_READNONE
+bool isInlinableLiteralV2F16(uint32_t Literal);
LLVM_READNONE
bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 7f52501..985b77b 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -125,15 +125,6 @@ defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3P_Profile<VOP_V2I16_V2
let SubtargetPredicate = HasVOP3PInsts in {
-// Undo sub x, c -> add x, -c canonicalization since c is more likely
-// an inline immediate than -c.
-// The constant will be emitted as a mov, and folded later.
-// TODO: We could directly encode the immediate now
-def : GCNPat<
- (add (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), NegSubInlineConstV216:$src1),
- (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1)
->;
-
// Integer operations with clamp bit set.
class VOP3PSatPat<SDPatternOperator pat, Instruction inst> : GCNPat<
(pat (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)),
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
index e4cabab..496ee9f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
@@ -172,8 +172,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc0ffc0
-; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, 64 op_sel_hi:[1,0]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
@@ -188,7 +187,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX10-LABEL: v_add_v2i16_neg_inline_imm_splat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_add_u16 v0, 0xffc0, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_sub_u16 v0, v0, 64 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %a, <i16 -64, i16 -64>
ret <2 x i16> %add
@@ -609,3 +608,65 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
%cast = bitcast <2 x i16> %add to i32
ret i32 %cast
}
+
+define <2 x i16> @add_inline_imm_neg1_0(<2 x i16> %x) {
+; GFX7-LABEL: add_inline_imm_neg1_0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, -1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_inline_imm_neg1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: add_inline_imm_neg1_0:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-NEXT: v_add_u16_e32 v0, -1, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: add_inline_imm_neg1_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %y = add <2 x i16> %x, <i16 -1, i16 0>
+ ret <2 x i16> %y
+}
+
+define <2 x i16> @add_inline_imm_1_0(<2 x i16> %x) {
+; GFX7-LABEL: add_inline_imm_1_0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_inline_imm_1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: add_inline_imm_1_0:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-NEXT: v_add_u16_e32 v0, 1, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: add_inline_imm_1_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %y = add <2 x i16> %x, <i16 1, i16 0>
+ ret <2 x i16> %y
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
index aa7aa6b..5613501 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
@@ -156,13 +156,13 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_splat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0xffc0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0xffc0ffc0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_sub_v2i16_neg_inline_imm_splat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0xffc0 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0xffc0ffc0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sub = sub <2 x i16> %a, <i16 -64, i16 -64>
ret <2 x i16> %sub
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
index b90d68a..7cf58a2 100644
--- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -437,7 +437,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v0, v0, s[2:3] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v0, v0, -1
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
@@ -449,7 +449,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v0, v0, s[2:3] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, -1
; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -460,7 +460,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, -1
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -566,8 +566,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v0, v0, s[2:3] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s2, 1.0
-; GFX9-NEXT: v_pk_add_u16 v0, v0, s2
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1.0
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
@@ -579,7 +578,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v0, v0, s[2:3] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_add_u16 v0, 0x3f80, v0 op_sel:[1,0] op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1.0
; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -590,7 +589,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_add_u16 v0, 0x3f80, v0 op_sel:[1,0] op_sel_hi:[0,1]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1.0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -990,6 +989,66 @@ define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i64(ptr addrspace(1) %out,
ret void
}
+define <2 x i16> @add_inline_imm_neg1_0(<2 x i16> %x) {
+; VI-LABEL: add_inline_imm_neg1_0:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
+; VI-NEXT: v_add_u16_e32 v0, -1, v0
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_inline_imm_neg1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: add_inline_imm_neg1_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: add_inline_imm_neg1_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %y = add <2 x i16> %x, <i16 -1, i16 0>
+ ret <2 x i16> %y
+}
+
+define <2 x i16> @add_inline_imm_1_0(<2 x i16> %x) {
+; VI-LABEL: add_inline_imm_1_0:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
+; VI-NEXT: v_add_u16_e32 v0, 1, v0
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_inline_imm_1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: add_inline_imm_1_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: add_inline_imm_1_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %y = add <2 x i16> %x, <i16 1, i16 0>
+ ret <2 x i16> %y
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index cb89841b..d63ebde 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -431,7 +431,7 @@ define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) {
;
; GFX11-LABEL: ps_mesa_v2i16:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -468,7 +468,7 @@ define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) {
;
; GFX11-LABEL: ps_mesa_inreg_v2i16:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_pk_sub_u16 v0, s0, -1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, s0, 1 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 329f0a2..dfc8361 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -597,7 +597,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep(ptr addrspace(3) %ptr) {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: ds_read_u16_d16_hi v1, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GCN-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GCN-NEXT: ds_read_u16_d16 v1, v0 offset:2
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, v1
@@ -608,7 +608,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep(ptr addrspace(3) %ptr) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: ds_read_u16_d16_hi v1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX10-NEXT: ds_read_u16_d16 v1, v0 offset:2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v1
@@ -619,7 +619,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep(ptr addrspace(3) %ptr) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: ds_load_u16_d16_hi v1, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX11-NEXT: ds_load_u16_d16 v1, v0 offset:2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
@@ -643,7 +643,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; GFX900-NEXT: ds_read_u16_d16_hi v0, v0
; GFX900-NEXT: s_mov_b32 s4, 0xffff
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -654,7 +654,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; FLATSCR-NEXT: ds_read_u16_d16_hi v0, v0
; FLATSCR-NEXT: s_mov_b32 s0, 0xffff
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
-; FLATSCR-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; FLATSCR-NEXT: v_bfi_b32 v0, s0, v1, v0
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
;
@@ -664,7 +664,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; GFX10-NEXT: ds_read_u16 v1, v0 offset:2
; GFX10-NEXT: ds_read_u16_d16_hi v0, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
@@ -674,7 +674,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; GFX11-NEXT: ds_load_u16 v1, v0 offset:2
; GFX11-NEXT: ds_load_u16_d16_hi v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -694,7 +694,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], 0 offen
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX900-NEXT: buffer_load_short_d16 v1, v0, s[0:3], 0 offen offset:2
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
@@ -705,7 +705,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: scratch_load_short_d16_hi v1, v0, off
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; FLATSCR-NEXT: scratch_load_short_d16 v1, v0, off offset:2
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: v_mov_b32_e32 v0, v1
@@ -716,7 +716,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], 0 offen
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0)
-; GFX10_DEFAULT-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX10_DEFAULT-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX10_DEFAULT-NEXT: buffer_load_short_d16 v1, v0, s[0:3], 0 offen offset:2
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0)
; GFX10_DEFAULT-NEXT: v_mov_b32_e32 v0, v1
@@ -727,7 +727,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, v0, off
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; FLATSCR_GFX10-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; FLATSCR_GFX10-NEXT: scratch_load_short_d16 v1, v0, off offset:2
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, v1
@@ -738,7 +738,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX11-NEXT: scratch_load_d16_b16 v1, v0, off offset:2
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
@@ -762,7 +762,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) {
; GFX900-NEXT: global_load_short_d16_hi v0, v[0:1], off glc
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0xffff
-; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -774,7 +774,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) {
; FLATSCR-NEXT: global_load_short_d16_hi v0, v[0:1], off glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, 0xffff
-; FLATSCR-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; FLATSCR-NEXT: v_bfi_b32 v0, s0, v2, v0
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
;
@@ -785,7 +785,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) {
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_load_short_d16_hi v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
@@ -796,7 +796,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) {
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_load_d16_hi_b16 v0, v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -820,7 +820,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0xffff
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -833,7 +833,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, 0xffff
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
-; FLATSCR-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; FLATSCR-NEXT: v_bfi_b32 v0, s0, v2, v0
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
;
@@ -846,7 +846,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: flat_load_short_d16_hi v0, v[0:1] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
@@ -857,7 +857,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
index 7894f6b..e12de1d 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
@@ -255,8 +255,8 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) {
; GFX10-GISEL: ; %bb.0: ; %bb
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e
-; GFX10-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e, v0 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e, v0, v1 op_sel_hi:[0,1,1]
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0
+; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2
; GFX10-GISEL-NEXT: v_cmp_gt_f16_e64 s4, 0, v0
@@ -288,9 +288,9 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e
-; GFX11-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e, v0 op_sel_hi:[0,1]
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e, v0, v1 op_sel_hi:[0,1,1]
+; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX11-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 0ff5ea6..3e658c6 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -77,11 +77,29 @@ define <2 x half> @v_mul_42_v2f16(<2 x half> %x) {
; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_mul_42_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_pk_mul_f16 v0, 0x5140, v0 op_sel_hi:[0,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_42_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, 0x5140, v0 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_42_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, 0x51405140, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_42_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, 0x5140, v0 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_42_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, 0x51405140, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x half> %x, <half 42.0, half 42.0>
ret <2 x half> %mul
}
@@ -3192,11 +3210,29 @@ define <2 x half> @v_mul_16_v2f16(<2 x half> %x) {
; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_mul_16_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_16_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_16_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, 0x4c004c00, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_16_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_16_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, 0x4c004c00, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x half> %x, <half 16.0, half 16.0>
ret <2 x half> %mul
}
@@ -3216,11 +3252,29 @@ define <2 x half> @v_mul_neg16_v2f16(<2 x half> %x) {
; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_mul_neg16_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_pk_mul_f16 v0, 0xcc00, v0 op_sel_hi:[0,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg16_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, 0xcc00, v0 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg16_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, 0xcc00cc00, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg16_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, 0xcc00, v0 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg16_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, 0xcc00cc00, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x half> %x, <half -16.0, half -16.0>
ret <2 x half> %mul
}
@@ -3242,12 +3296,33 @@ define <2 x half> @v_mul_fabs_16_v2f16(<2 x half> %x) {
; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_mul_fabs_16_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX1011-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_fabs_16_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_16_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, 0x4c004c00, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_16_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_16_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, 0x4c004c00, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%x.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%mul = fmul <2 x half> %x.fabs, <half 16.0, half 16.0>
ret <2 x half> %mul
@@ -3268,11 +3343,29 @@ define <2 x half> @v_fma_mul_add_32_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v0, v2, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_fma_mul_add_32_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_pk_fma_f16 v0, 0x5000, v0, v1 op_sel_hi:[0,1,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_fma_mul_add_32_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, 0x5000, v0, v1 op_sel_hi:[0,1,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_fma_mul_add_32_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, 0x50005000, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_fma_mul_add_32_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, 0x5000, v0, v1 op_sel_hi:[0,1,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_fma_mul_add_32_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, 0x50005000, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract <2 x half> %x, <half 32.0, half 32.0>
%fma = fadd contract <2 x half> %mul, %y
ret <2 x half> %fma
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index 3afcc7d..afb3a02 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -480,7 +480,7 @@ define amdgpu_kernel void @fptosi_f16_to_i1(ptr addrspace(1) %out, half %in) {
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_eq_f16_e64 s[4:5], 0xbc00, s4
+; VI-NEXT: v_cmp_eq_f16_e64 s[4:5], -1.0, s4
; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
@@ -492,7 +492,7 @@ define amdgpu_kernel void @fptosi_f16_to_i1(ptr addrspace(1) %out, half %in) {
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_eq_f16_e64 s2, 0xbc00, s2
+; GFX11-NEXT: v_cmp_eq_f16_e64 s2, -1.0, s2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
; GFX11-NEXT: s_mov_b32 s2, -1
diff --git a/llvm/test/CodeGen/AMDGPU/immv216.ll b/llvm/test/CodeGen/AMDGPU/immv216.ll
index 8c33004..b66ca71 100644
--- a/llvm/test/CodeGen/AMDGPU/immv216.ll
+++ b/llvm/test/CodeGen/AMDGPU/immv216.ll
@@ -580,7 +580,7 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
ret <2 x i16> %y
@@ -590,7 +590,7 @@ define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0xb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
ret <2 x i16> %y
@@ -600,7 +600,7 @@ define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
ret <2 x i16> %y
@@ -610,27 +610,25 @@ define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0xbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}shl_inline_imm_2.0_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x40004000
-; GFX9: v_pk_lshlrev_b16 v0, v0, [[K]]
+; GFX9: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel:[0,1]
-; GFX10: v_pk_lshlrev_b16 v0, v0, 0x4000 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x40,0x00,0x00]
+; GFX10: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xe9,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
define <2 x i16> @shl_inline_imm_2.0_v2i16(<2 x i16> %x) {
%y = shl <2 x i16> bitcast (<2 x half> <half 2.0, half 2.0> to <2 x i16>), %x
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}shl_inline_imm_neg_2.0_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc000c000
-; GFX9: v_pk_lshlrev_b16 v0, v0, [[K]]
+; GFX9: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel:[0,1]
-; GFX10: v_pk_lshlrev_b16 v0, v0, 0xc000 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc0,0x00,0x00]
+; GFX10: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xeb,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
%y = shl <2 x i16> bitcast (<2 x half> <half -2.0, half -2.0> to <2 x i16>), %x
ret <2 x i16> %y
@@ -640,7 +638,7 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
ret <2 x i16> %y
@@ -651,7 +649,7 @@ define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0xc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
ret <2 x i16> %y
@@ -661,7 +659,7 @@ define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
ret <2 x i16> %y
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index e2a3749..8874240 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -473,89 +473,47 @@ define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i16:
-; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i16:
-; GFX9-GISEL: ; %bb.0: ; %entry
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: clpeak_imad_pat_v2i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: clpeak_imad_pat_v2i16:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: clpeak_imad_pat_v2i16:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%y18 = add <2 x i16> %x, <i16 1, i16 1>
%add = mul <2 x i16> %y18, %y
@@ -733,18 +691,18 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX9-SDAG-LABEL: clpeak_imad_pat_v3i16:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v3, v5, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -775,18 +733,18 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v3, v4, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -817,8 +775,8 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX11-SDAG-LABEL: clpeak_imad_pat_v3i16:
; GFX11-SDAG: ; %bb.0: ; %entry
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
@@ -828,11 +786,11 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v3, v4, 1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
@@ -1130,18 +1088,18 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX9-SDAG-LABEL: clpeak_imad_pat_v4i16:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v3, v5, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -1172,18 +1130,18 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v3, v4, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -1214,8 +1172,8 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX11-SDAG-LABEL: clpeak_imad_pat_v4i16:
; GFX11-SDAG: ; %bb.0: ; %entry
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
@@ -1225,11 +1183,11 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v3, v4, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
@@ -1555,89 +1513,47 @@ define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: clpeak_umad_pat_v2i16:
-; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: clpeak_umad_pat_v2i16:
-; GFX9-GISEL: ; %bb.0: ; %entry
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: clpeak_umad_pat_v2i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_umad_pat_v2i16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: clpeak_umad_pat_v2i16:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: clpeak_umad_pat_v2i16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: clpeak_umad_pat_v2i16:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%y18 = add <2 x i16> %x, <i16 1, i16 1>
%add = mul <2 x i16> %y18, %y
@@ -1815,18 +1731,18 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX9-SDAG-LABEL: clpeak_umad_pat_v3i16:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v3, v5, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -1857,18 +1773,18 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX10-SDAG-LABEL: clpeak_umad_pat_v3i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v3, v4, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -1899,8 +1815,8 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX11-SDAG-LABEL: clpeak_umad_pat_v3i16:
; GFX11-SDAG: ; %bb.0: ; %entry
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
@@ -1910,11 +1826,11 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v3, v4, 1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
@@ -2212,18 +2128,18 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX9-SDAG-LABEL: clpeak_umad_pat_v4i16:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v3, v5, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -2254,18 +2170,18 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX10-SDAG-LABEL: clpeak_umad_pat_v4i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v3, v4, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -2296,8 +2212,8 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX11-SDAG-LABEL: clpeak_umad_pat_v4i16:
; GFX11-SDAG: ; %bb.0: ; %entry
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
@@ -2307,11 +2223,11 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v3, v4, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
@@ -7277,143 +7193,74 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX9-GISEL: ; %bb.0: ; %entry
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: clpeak_imad_pat_v2i16_x2:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: clpeak_imad_pat_v2i16_x2:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v2
+; GFX10-NEXT: v_pk_add_u16 v2, v1, v2
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX10-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: clpeak_imad_pat_v2i16_x2:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX11-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v1, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_add_u16 v2, v1, v2
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX11-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%y38 = add <2 x i16> %x, <i16 1, i16 1>
%add = mul <2 x i16> %y38, %y
@@ -7654,143 +7501,74 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX9-GISEL: ; %bb.0: ; %entry
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: clpeak_umad_pat_v2i16_x2:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: clpeak_umad_pat_v2i16_x2:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v2
+; GFX10-NEXT: v_pk_add_u16 v2, v1, v2
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX10-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: clpeak_umad_pat_v2i16_x2:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX11-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v1, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_add_u16 v2, v1, v2
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX11-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%y38 = add <2 x i16> %x, <i16 1, i16 1>
%add = mul <2 x i16> %y38, %y
@@ -8373,6 +8151,24 @@ define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: mul_u24_add64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: mul_u24_add64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v1
+; GFX11-GISEL-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
%add = add i64 %mul, %z
ret i64 %add
@@ -8410,6 +8206,15 @@ define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) {
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: mul_u24_zext_add64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y)
%mul.zext = zext i32 %mul to i64
%add = add i64 %mul.zext, %z
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
index 54bd78e..66f159f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
@@ -75,26 +75,15 @@ entry:
; Make sure we do not violate constant bus restriction with 3 scalar inputs and simingly inlinable literal.
define amdgpu_ps void @test_llvm_amdgcn_fdot2_bf16_bf16_sis(
-; SDAG-GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_sis:
-; SDAG-GFX11: ; %bb.0: ; %entry
-; SDAG-GFX11-NEXT: v_mov_b32_e32 v2, s1
-; SDAG-GFX11-NEXT: s_mov_b32 s1, 0x10001
-; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; SDAG-GFX11-NEXT: v_dot2_bf16_bf16 v2, s0, s1, v2
-; SDAG-GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; SDAG-GFX11-NEXT: s_nop 0
-; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; SDAG-GFX11-NEXT: s_endpgm
-;
-; GISEL-GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_sis:
-; GISEL-GFX11: ; %bb.0: ; %entry
-; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, 0x10001
-; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GISEL-GFX11-NEXT: v_dot2_bf16_bf16 v2, s0, v2, s1
-; GISEL-GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GISEL-GFX11-NEXT: s_nop 0
-; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GISEL-GFX11-NEXT: s_endpgm
+; GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_sis:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: v_mov_b32_e32 v2, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dot2_bf16_bf16 v2, s0, 0x10001, v2
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
<2 x i16> inreg %a,
i16 inreg %c) {
diff --git a/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll b/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
index 81918f5..e96570d 100644
--- a/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
+++ b/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
@@ -23,7 +23,7 @@ bb:
%tmp1 = zext i32 %tmp to i64
%tmp2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i64 %tmp1
%tmp3 = load <2 x half>, ptr addrspace(1) %tmp2, align 4
- %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH0000>)
+ %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH0000>)
store <2 x half> %tmp4, ptr addrspace(1) %tmp2, align 4
ret void
}
@@ -96,7 +96,7 @@ bb:
; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8:
; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c80000
; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
-; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}}
+; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c80000, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(ptr addrspace(1) nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
index 536b2d0..3c654e9 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
@@ -1622,14 +1622,14 @@ define <2 x i16> @v_mul_add_1_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX9-LABEL: v_mul_add_1_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mul_add_1_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %y, <i16 1, i16 1>
@@ -1665,14 +1665,14 @@ define <2 x i16> @v_mul_add_1_v2i16_commute(<2 x i16> %x, <2 x i16> %y) {
; GFX9-LABEL: v_mul_add_1_v2i16_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mul_add_1_v2i16_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %y, <i16 1, i16 1>
@@ -1886,14 +1886,14 @@ define <2 x i16> @v_mul_add_2_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX9-LABEL: v_mul_add_2_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v1, v1, -2 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v1, v1, 2 op_sel_hi:[1,0]
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mul_add_2_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v1, v1, -2 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 2 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %y, <i16 2, i16 2>
@@ -2929,14 +2929,14 @@ define <2 x i16> @v_mul_5_add_1_v2i16(<2 x i16> %arg) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, 5 op_sel_hi:[1,0]
-; GFX9-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mul_5_add_1_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, 5 op_sel_hi:[1,0]
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul <2 x i16> %arg, <i16 5, i16 5>
%add = add <2 x i16> %mul, <i16 1, i16 1>
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index a8ae8c0..73f2834 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -2399,7 +2399,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0xc400 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0xc4000000
; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -2410,7 +2410,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0xc400 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0xc4000000
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2534,7 +2534,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x4400 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x44000000
; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -2545,7 +2545,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x4400 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x44000000
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2645,76 +2645,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(ptr addrspace(1) %out,
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffe0ffe0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg32_neg32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg32_neg32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg32_neg32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -2803,76 +2767,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffe00000
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel:[1,0] op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_0_neg32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_0_neg32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel:[1,0] op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_0_neg32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -2963,76 +2891,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffe0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg32_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg32_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg32_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3128,75 +3020,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(ptr addrspace(1) %out,
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg16_neg16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg16_neg16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg16_neg16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3285,75 +3142,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_0_neg16(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_0_neg16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_0_neg16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_0_neg16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3444,75 +3266,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, -16
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, -16
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg16_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 16
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg16_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 16
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, -16
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg16_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3613,9 +3400,9 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x3c003c00
+; GFX9-SDAG-NEXT: s_mov_b32 s2, 0xc400c400
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, s2
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-SDAG-NEXT: s_endpgm
;
@@ -3631,53 +3418,29 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0x3c00 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xc400, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
-;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0x3c00 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg_fpone:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, 0xc400c400, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xc400, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg_fpone:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, 0xc400c400, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3778,9 +3541,9 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0xbc00bc00
+; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x44004400
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, s2
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-SDAG-NEXT: s_endpgm
;
@@ -3796,53 +3559,29 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0xbc00 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
-;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0xbc00 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg_negfpone:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, 0x44004400, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg_negfpone:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, 0x44004400, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3937,77 +3676,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(ptr addrspace(1) %out, p
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0xc000c000
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, s2
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x40004000
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0xc000 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0x4000, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg_fptwo:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0xc000 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg_fptwo:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0x4000, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg_fptwo:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -4102,77 +3804,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(ptr addrspace(1) %out
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x40004000
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, s2
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc000c000
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0x4000 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xc000, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg_negfptwo:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0x4000 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg_negfptwo:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xc000, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg_negfptwo:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -4260,76 +3925,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out,
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffe00000
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel:[1,0] op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_undef_neg32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_undef_neg32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel:[1,0] op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_undef_neg32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -4455,7 +4084,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel_hi:[0,1]
+; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffffffe0, v1
; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
;
@@ -4479,7 +4108,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel_hi:[0,1]
+; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffffffe0, v1
; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index 9a6851c..b237703 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -6,7 +6,7 @@
; GFX9: s_load_dword [[VAL:s[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
+; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
; CIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
; CIVI: s_sub_i32
@@ -30,7 +30,7 @@ define amdgpu_kernel void @s_abs_v2i16(ptr addrspace(1) %out, <2 x i16> %val) #0
; GFX9: global_load_dword [[VAL:v[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
+; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
; VI-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
; VI-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
@@ -70,7 +70,7 @@ define amdgpu_kernel void @v_abs_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9: s_load_dword [[VAL:s[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
+; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val) #0 {
%z0 = insertelement <2 x i16> undef, i16 0, i16 0
%z1 = insertelement <2 x i16> %z0, i16 0, i16 1
@@ -88,7 +88,7 @@ define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val)
; GFX9: global_load_dword [[VAL:v[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
+; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
define amdgpu_kernel void @v_abs_v2i16_2(ptr addrspace(1) %out, ptr addrspace(1) %src) #0 {
%z0 = insertelement <2 x i16> undef, i16 0, i16 0
%z1 = insertelement <2 x i16> %z0, i16 0, i16 1
@@ -111,8 +111,8 @@ define amdgpu_kernel void @v_abs_v2i16_2(ptr addrspace(1) %out, ptr addrspace(1)
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, s[[#LOAD + 3]]
; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], s[[#LOAD + 2]], [[SUB0]]
; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], s[[#LOAD + 3]], [[SUB1]]
-; GFX9-DAG: v_pk_sub_u16 [[ADD0:v[0-9]+]], [[MAX0]], -2 op_sel_hi:[1,0]
-; GFX9-DAG: v_pk_sub_u16 [[ADD1:v[0-9]+]], [[MAX1]], -2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2 op_sel_hi:[1,0]
define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) #0 {
%z0 = insertelement <4 x i16> undef, i16 0, i16 0
%z1 = insertelement <4 x i16> %z0, i16 0, i16 1
@@ -135,11 +135,11 @@ define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) #0
; GFX9-DAG: v_pk_sub_i16 [[SUB0:v[0-9]+]], 0, v[[VAL0]]
; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], v[[VAL0]], [[SUB0]]
-; GFX9-DAG: v_pk_sub_u16 [[ADD0:v[0-9]+]], [[MAX0]], -2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2 op_sel_hi:[1,0]
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, v[[VAL1]]
; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], v[[VAL1]], [[SUB1]]
-; GFX9-DAG: v_pk_sub_u16 [[ADD1:v[0-9]+]], [[MAX1]], -2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2 op_sel_hi:[1,0]
define amdgpu_kernel void @v_abs_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %src) #0 {
%z0 = insertelement <4 x i16> undef, i16 0, i16 0
%z1 = insertelement <4 x i16> %z0, i16 0, i16 1
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index aedf06d..a2712ec 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -427,7 +427,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: v_pk_sub_i16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -460,7 +460,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 s3, 0x31016000
; GFX10-NEXT: s_mov_b32 s2, -1
-; GFX10-NEXT: v_pk_sub_i16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX10-NEXT: s_endpgm
;
@@ -473,7 +473,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
-; GFX11-NEXT: v_pk_sub_i16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -562,13 +562,12 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_mov_b32 s4, 1.0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v0, v0, s[2:3] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: v_pk_sub_i16 v0, v0, s4
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, 1.0
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -600,7 +599,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 s3, 0x31016000
; GFX10-NEXT: s_mov_b32 s2, -1
-; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0x3f80 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, 1.0
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX10-NEXT: s_endpgm
;
@@ -613,7 +612,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
-; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0x3f80 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, 1.0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index e46992c..819e5e8 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -369,13 +369,13 @@ define <2 x i16> @vec_smax_smin(<2 x i16> %src) {
; SDAG-GFX9-NEXT: v_pk_min_i16 v0, v0, s4 op_sel_hi:[1,0]
; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: vec_smax_smin:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_i16 v0, v0, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX11-LABEL: vec_smax_smin:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
+; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: vec_smax_smin:
; GISEL-VI: ; %bb.0:
@@ -396,6 +396,14 @@ define <2 x i16> @vec_smax_smin(<2 x i16> %src) {
; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0xff00ff
; GISEL-GFX9-NEXT: v_pk_min_i16 v0, v0, v1
; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-LABEL: vec_smax_smin:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
%src.max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src, <2 x i16> <i16 0, i16 0>)
%src.clamp = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %src.max, <2 x i16> <i16 255, i16 255>)
ret <2 x i16> %src.clamp
@@ -548,13 +556,13 @@ define <2 x i16> @vec_smin_smax(<2 x i16> %src) {
; SDAG-GFX9-NEXT: v_pk_max_i16 v0, v0, 0
; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: vec_smin_smax:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_max_i16 v0, v0, 0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX11-LABEL: vec_smin_smax:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
+; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: vec_smin_smax:
; GISEL-VI: ; %bb.0:
@@ -575,7 +583,17 @@ define <2 x i16> @vec_smin_smax(<2 x i16> %src) {
; GISEL-GFX9-NEXT: v_pk_min_i16 v0, v0, v1
; GISEL-GFX9-NEXT: v_pk_max_i16 v0, v0, 0
; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-LABEL: vec_smin_smax:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
%src.min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %src, <2 x i16> <i16 255, i16 255>)
%src.clamp = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src.min, <2 x i16> <i16 0, i16 0>)
ret <2 x i16> %src.clamp
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
index 45a320a..829b0eb 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
@@ -463,7 +463,7 @@ v_pk_add_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_add_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x02,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_add_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
@@ -477,9 +477,12 @@ v_pk_add_i16 v5, null, exec_lo
v_pk_add_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x02,0xcc,0xc1,0xfe,0x00,0x00]
-v_pk_add_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+v_pk_add_i16 v5, 0x3800, m0 op_sel:[0,0] op_sel_hi:[1,1]
// GFX11: [0x05,0x40,0x02,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+v_pk_add_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x02,0xcc,0xf0,0xfa,0x00,0x18]
+
v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
@@ -508,7 +511,7 @@ v_pk_add_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_add_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x0a,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_add_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
@@ -523,7 +526,7 @@ v_pk_add_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x0a,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_add_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x0a,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0a,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
@@ -553,7 +556,7 @@ v_pk_ashrrev_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_ashrrev_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x06,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_ashrrev_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
@@ -568,7 +571,7 @@ v_pk_ashrrev_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x06,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x06,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x06,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
@@ -643,7 +646,7 @@ v_pk_lshlrev_b16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_lshlrev_b16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x04,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_lshlrev_b16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
@@ -658,7 +661,7 @@ v_pk_lshlrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x04,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x04,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x04,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x04,0xcc,0xfd,0xd4,0x00,0x10]
@@ -688,7 +691,7 @@ v_pk_lshrrev_b16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_lshrrev_b16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x05,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_lshrrev_b16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
@@ -703,7 +706,7 @@ v_pk_lshrrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x05,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x05,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x05,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
@@ -733,7 +736,7 @@ v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15
// GFX11: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
v_pk_mad_i16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
-// GFX11: [0x05,0x00,0x00,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x00,0x00,0xcc,0x7d,0xe0,0xf5,0x01]
v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
// GFX11: [0x05,0x40,0x00,0xcc,0x7e,0x82,0xad,0x01]
@@ -748,7 +751,7 @@ v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX11: [0x05,0x40,0x00,0xcc,0xc1,0xfe,0xf4,0x1b]
v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
-// GFX11: [0x05,0x48,0x00,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13]
v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
// GFX11: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -778,7 +781,7 @@ v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15
// GFX11: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
v_pk_mad_u16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
-// GFX11: [0x05,0x00,0x09,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x00,0x09,0xcc,0x7d,0xe0,0xf5,0x01]
v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
// GFX11: [0x05,0x40,0x09,0xcc,0x7e,0x82,0xad,0x01]
@@ -793,7 +796,7 @@ v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX11: [0x05,0x40,0x09,0xcc,0xc1,0xfe,0xf4,0x1b]
v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
-// GFX11: [0x05,0x48,0x09,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13]
v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
// GFX11: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -868,7 +871,7 @@ v_pk_max_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_max_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x07,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_max_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
@@ -883,7 +886,7 @@ v_pk_max_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x07,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_max_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x07,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x07,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
@@ -913,7 +916,7 @@ v_pk_max_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_max_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x0c,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_max_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
@@ -928,7 +931,7 @@ v_pk_max_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x0c,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_max_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x0c,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0c,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1003,7 +1006,7 @@ v_pk_min_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_min_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x08,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_min_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
@@ -1018,7 +1021,7 @@ v_pk_min_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x08,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_min_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x08,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x08,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1048,7 +1051,7 @@ v_pk_min_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_min_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x0d,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_min_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
@@ -1063,7 +1066,7 @@ v_pk_min_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x0d,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_min_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x0d,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0d,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1138,7 +1141,7 @@ v_pk_mul_lo_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_mul_lo_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x01,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_mul_lo_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
@@ -1153,7 +1156,7 @@ v_pk_mul_lo_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x01,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x01,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x01,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1183,7 +1186,7 @@ v_pk_sub_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_sub_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x03,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_sub_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
@@ -1198,7 +1201,7 @@ v_pk_sub_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x03,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_sub_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x03,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x03,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1228,7 +1231,7 @@ v_pk_sub_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_sub_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x0b,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_sub_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
@@ -1243,7 +1246,7 @@ v_pk_sub_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x0b,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_sub_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x0b,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0b,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
index 9a21f7a..a8347fb 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
@@ -463,7 +463,7 @@ v_pk_add_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_add_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x02,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_add_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
@@ -478,7 +478,7 @@ v_pk_add_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x02,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_add_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x02,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x02,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
@@ -508,7 +508,7 @@ v_pk_add_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_add_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x0a,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_add_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
@@ -523,7 +523,7 @@ v_pk_add_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x0a,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_add_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x0a,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0a,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
@@ -553,7 +553,7 @@ v_pk_ashrrev_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_ashrrev_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x06,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_ashrrev_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
@@ -568,7 +568,7 @@ v_pk_ashrrev_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x06,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x06,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x06,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
@@ -643,7 +643,7 @@ v_pk_lshlrev_b16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_lshlrev_b16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x04,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_lshlrev_b16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
@@ -658,6 +658,9 @@ v_pk_lshlrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x04,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX12: [0x05,0x40,0x04,0xcc,0xf0,0xfa,0x00,0x18]
+
+v_pk_lshlrev_b16 v5, 0x3800, m0 op_sel:[0,0] op_sel_hi:[1,1]
// GFX12: [0x05,0x40,0x04,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
@@ -688,7 +691,7 @@ v_pk_lshrrev_b16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_lshrrev_b16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x05,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_lshrrev_b16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
@@ -703,7 +706,7 @@ v_pk_lshrrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x05,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x05,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x05,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
@@ -733,7 +736,7 @@ v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15
// GFX12: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
v_pk_mad_i16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
-// GFX12: [0x05,0x00,0x00,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x00,0x00,0xcc,0x7d,0xe0,0xf5,0x01]
v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
// GFX12: [0x05,0x40,0x00,0xcc,0x7e,0x82,0xad,0x01]
@@ -748,7 +751,7 @@ v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX12: [0x05,0x40,0x00,0xcc,0xc1,0xfe,0xf4,0x1b]
v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
-// GFX12: [0x05,0x48,0x00,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13]
v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
// GFX12: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -778,7 +781,7 @@ v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15
// GFX12: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
v_pk_mad_u16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
-// GFX12: [0x05,0x00,0x09,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x00,0x09,0xcc,0x7d,0xe0,0xf5,0x01]
v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
// GFX12: [0x05,0x40,0x09,0xcc,0x7e,0x82,0xad,0x01]
@@ -793,7 +796,7 @@ v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX12: [0x05,0x40,0x09,0xcc,0xc1,0xfe,0xf4,0x1b]
v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
-// GFX12: [0x05,0x48,0x09,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13]
v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
// GFX12: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -868,7 +871,7 @@ v_pk_max_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_max_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x07,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_max_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
@@ -883,7 +886,7 @@ v_pk_max_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x07,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_max_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x07,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x07,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
@@ -913,7 +916,7 @@ v_pk_max_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_max_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x0c,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_max_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
@@ -928,7 +931,7 @@ v_pk_max_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x0c,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_max_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x0c,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0c,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1003,7 +1006,7 @@ v_pk_min_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_min_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x08,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_min_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
@@ -1018,7 +1021,7 @@ v_pk_min_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x08,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_min_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x08,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x08,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1048,7 +1051,7 @@ v_pk_min_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_min_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x0d,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_min_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
@@ -1063,7 +1066,7 @@ v_pk_min_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x0d,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_min_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x0d,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0d,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1138,7 +1141,7 @@ v_pk_mul_lo_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_mul_lo_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x01,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_mul_lo_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
@@ -1153,7 +1156,7 @@ v_pk_mul_lo_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x01,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x01,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x01,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1183,7 +1186,7 @@ v_pk_sub_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_sub_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x03,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_sub_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
@@ -1198,7 +1201,7 @@ v_pk_sub_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x03,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_sub_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x03,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x03,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1228,7 +1231,7 @@ v_pk_sub_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_sub_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x0b,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_sub_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
@@ -1243,7 +1246,7 @@ v_pk_sub_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x0b,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_sub_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x0b,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0b,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
diff --git a/llvm/test/MC/AMDGPU/literalv216.s b/llvm/test/MC/AMDGPU/literalv216.s
index 5b1c7a76..c695bc3 100644
--- a/llvm/test/MC/AMDGPU/literalv216.s
+++ b/llvm/test/MC/AMDGPU/literalv216.s
@@ -113,6 +113,10 @@ v_pk_add_f16 v1, 0x0001, v2
// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x81,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xffff, v2
+// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
+// GFX10: v_pk_add_f16 v1, 0xffff, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0xff,0xff,0x00,0x00]
+
+v_pk_add_f16 v1, 0xffffffff, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18]
// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc1,0x04,0x02,0x18]
@@ -153,6 +157,10 @@ v_pk_add_f16 v1, 0x3118, v2
// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf8,0x04,0x02,0x18]
v_pk_add_f16 v1, 65535, v2
+// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
+// GFX10: v_pk_add_f16 v1, 0xffff, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0xff,0xff,0x00,0x00]
+
+v_pk_add_f16 v1, 4294967295, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18]
// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc1,0x04,0x02,0x18]
@@ -242,7 +250,7 @@ v_pk_add_f16 v5, v1, 0.1234
v_pk_add_u16 v5, v1, 0.1234
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX10: v_pk_add_u16 v5, v1, 0x2fe6 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00]
+// GFX10: v_pk_add_u16 v5, v1, 0x3dfcb924 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0x24,0xb9,0xfc,0x3d]
v_pk_fma_f16 v5, 0.1234, v2, v3
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
@@ -258,23 +266,23 @@ v_pk_fma_f16 v5, v1, v2, 0.1234
v_pk_mad_i16 v5, 0.1234, v2, v3
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX10: v_pk_mad_i16 v5, 0x2fe6, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0xe6,0x2f,0x00,0x00]
+// GFX10: v_pk_mad_i16 v5, 0x3dfcb924, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x24,0xb9,0xfc,0x3d]
v_pk_mad_i16 v5, v1, 0.1234, v3
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX10: v_pk_mad_i16 v5, v1, 0x2fe6, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0xe6,0x2f,0x00,0x00]
+// GFX10: v_pk_mad_i16 v5, v1, 0x3dfcb924, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x24,0xb9,0xfc,0x3d]
v_pk_mad_i16 v5, v1, v2, 0.1234
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX10: v_pk_mad_i16 v5, v1, v2, 0x2fe6 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0xe6,0x2f,0x00,0x00]
+// GFX10: v_pk_mad_i16 v5, v1, v2, 0x3dfcb924 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x24,0xb9,0xfc,0x3d]
v_pk_add_f16 v5, v1, 123456.0
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// NOGFX10: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
v_pk_add_u16 v5, v1, 123456.0
-// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX10: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
+// GFX10: v_pk_add_u16 v5, v1, 0x47f12000 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x20,0xf1,0x47]
//===----------------------------------------------------------------------===//
// Packed VOP2
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt
index e42d0de..a022c79 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt
@@ -79,7 +79,7 @@
# GFX10: v_pk_fma_f16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b]
0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b
-# GFX10: v_pk_mad_i16 v5, 0x3c00, 0x4000, 0x4400 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0xfe,0xfd,0x1b,0x00,0x3c,0x00,0x00]
+# GFX10: v_pk_mad_i16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b]
0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b
# GFX10: v_pk_mad_u16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt
index bc2cb5f..838e6e0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt
@@ -466,7 +466,7 @@
# GFX11: v_pk_add_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_add_i16 v5, m0, 0x3800
+# GFX11: v_pk_add_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_add_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
@@ -481,7 +481,7 @@
# GFX11: v_pk_add_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x02,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x02,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_add_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_add_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x02,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x02,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
@@ -511,7 +511,7 @@
# GFX11: v_pk_add_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_add_u16 v5, m0, 0x3800
+# GFX11: v_pk_add_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_add_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
@@ -526,7 +526,7 @@
# GFX11: v_pk_add_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0a,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0a,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_add_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_add_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0a,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0a,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
@@ -556,7 +556,7 @@
# GFX11: v_pk_ashrrev_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_ashrrev_i16 v5, m0, 0x3800
+# GFX11: v_pk_ashrrev_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_ashrrev_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
@@ -571,7 +571,7 @@
# GFX11: v_pk_ashrrev_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x06,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x06,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_ashrrev_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x06,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x06,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
@@ -646,7 +646,7 @@
# GFX11: v_pk_lshlrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_lshlrev_b16 v5, m0, 0x3800
+# GFX11: v_pk_lshlrev_b16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_lshlrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
@@ -661,7 +661,7 @@
# GFX11: v_pk_lshlrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x04,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x04,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_lshlrev_b16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x04,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x04,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x04,0xcc,0xfd,0xd4,0x00,0x10]
@@ -691,7 +691,7 @@
# GFX11: v_pk_lshrrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_lshrrev_b16 v5, m0, 0x3800
+# GFX11: v_pk_lshrrev_b16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_lshrrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
@@ -706,7 +706,7 @@
# GFX11: v_pk_lshrrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x05,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x05,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_lshrrev_b16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x05,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x05,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
@@ -736,7 +736,7 @@
# GFX11: v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19
-# GFX11: v_pk_mad_i16 v5, m0, 0x3800, m0
+# GFX11: v_pk_mad_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x40,0x00,0xcc,0x7d,0xe0,0xf5,0x19]
0x05,0x40,0x00,0xcc,0x7d,0xe0,0xf5,0x19
# GFX11: v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x00,0xcc,0x7e,0x82,0xad,0x01]
@@ -751,7 +751,7 @@
# GFX11: v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,1,1] op_sel_hi:[1,0,0] ; encoding: [0x05,0x38,0x00,0xcc,0xc1,0xfe,0xf4,0x0b]
0x05,0x38,0x00,0xcc,0xc1,0xfe,0xf4,0x0b
-# GFX11: v_pk_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+# GFX11: v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] ; encoding: [0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13]
0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13
# GFX11: v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -781,7 +781,7 @@
# GFX11: v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19
-# GFX11: v_pk_mad_u16 v5, m0, 0x3800, m0
+# GFX11: v_pk_mad_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x40,0x09,0xcc,0x7d,0xe0,0xf5,0x19]
0x05,0x40,0x09,0xcc,0x7d,0xe0,0xf5,0x19
# GFX11: v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x09,0xcc,0x7e,0x82,0xad,0x01]
@@ -796,7 +796,7 @@
# GFX11: v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,1,1] op_sel_hi:[1,0,0] ; encoding: [0x05,0x38,0x09,0xcc,0xc1,0xfe,0xf4,0x0b]
0x05,0x38,0x09,0xcc,0xc1,0xfe,0xf4,0x0b
-# GFX11: v_pk_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+# GFX11: v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] ; encoding: [0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13]
0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13
# GFX11: v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -871,7 +871,7 @@
# GFX11: v_pk_max_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_max_i16 v5, m0, 0x3800
+# GFX11: v_pk_max_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_max_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
@@ -886,7 +886,7 @@
# GFX11: v_pk_max_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x07,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x07,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_max_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_max_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x07,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x07,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
@@ -916,7 +916,7 @@
# GFX11: v_pk_max_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_max_u16 v5, m0, 0x3800
+# GFX11: v_pk_max_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_max_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
@@ -931,7 +931,7 @@
# GFX11: v_pk_max_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0c,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0c,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_max_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_max_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0c,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0c,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1006,7 +1006,7 @@
# GFX11: v_pk_min_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_min_i16 v5, m0, 0x3800
+# GFX11: v_pk_min_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_min_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
@@ -1021,7 +1021,7 @@
# GFX11: v_pk_min_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x08,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x08,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_min_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_min_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x08,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x08,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1051,7 +1051,7 @@
# GFX11: v_pk_min_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_min_u16 v5, m0, 0x3800
+# GFX11: v_pk_min_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_min_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
@@ -1066,7 +1066,7 @@
# GFX11: v_pk_min_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0d,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0d,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_min_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_min_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0d,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0d,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1141,7 +1141,7 @@
# GFX11: v_pk_mul_lo_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_mul_lo_u16 v5, m0, 0x3800
+# GFX11: v_pk_mul_lo_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_mul_lo_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
@@ -1156,7 +1156,7 @@
# GFX11: v_pk_mul_lo_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x01,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x01,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_mul_lo_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x01,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x01,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1186,7 +1186,7 @@
# GFX11: v_pk_sub_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_sub_i16 v5, m0, 0x3800
+# GFX11: v_pk_sub_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_sub_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
@@ -1201,7 +1201,7 @@
# GFX11: v_pk_sub_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x03,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x03,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_sub_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_sub_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x03,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x03,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1231,7 +1231,7 @@
# GFX11: v_pk_sub_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_sub_u16 v5, m0, 0x3800
+# GFX11: v_pk_sub_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_sub_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
@@ -1246,7 +1246,7 @@
# GFX11: v_pk_sub_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0b,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0b,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_sub_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_sub_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0b,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0b,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt
index 373cd71..44d8995 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt
@@ -463,7 +463,7 @@
# GFX12: v_pk_add_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_add_i16 v5, m0, 0x3800
+# GFX12: v_pk_add_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_add_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
@@ -478,7 +478,7 @@
# GFX12: v_pk_add_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x02,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x02,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_add_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_add_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x02,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x02,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
@@ -508,7 +508,7 @@
# GFX12: v_pk_add_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_add_u16 v5, m0, 0x3800
+# GFX12: v_pk_add_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_add_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
@@ -523,7 +523,7 @@
# GFX12: v_pk_add_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0a,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0a,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_add_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_add_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0a,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0a,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
@@ -553,7 +553,7 @@
# GFX12: v_pk_ashrrev_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_ashrrev_i16 v5, m0, 0x3800
+# GFX12: v_pk_ashrrev_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_ashrrev_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
@@ -568,7 +568,7 @@
# GFX12: v_pk_ashrrev_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x06,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x06,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_ashrrev_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x06,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x06,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
@@ -643,7 +643,7 @@
# GFX12: v_pk_lshlrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_lshlrev_b16 v5, m0, 0x3800
+# GFX12: v_pk_lshlrev_b16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_lshlrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
@@ -658,7 +658,7 @@
# GFX12: v_pk_lshlrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x04,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x04,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_lshlrev_b16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x04,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x04,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x04,0xcc,0xfd,0xd4,0x00,0x10]
@@ -688,7 +688,7 @@
# GFX12: v_pk_lshrrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_lshrrev_b16 v5, m0, 0x3800
+# GFX12: v_pk_lshrrev_b16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_lshrrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
@@ -703,7 +703,7 @@
# GFX12: v_pk_lshrrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x05,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x05,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_lshrrev_b16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x05,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x05,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
@@ -733,7 +733,7 @@
# GFX12: v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19
-# GFX12: v_pk_mad_i16 v5, m0, 0x3800, m0
+# GFX12: v_pk_mad_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x40,0x00,0xcc,0x7d,0xe0,0xf5,0x19]
0x05,0x40,0x00,0xcc,0x7d,0xe0,0xf5,0x19
# GFX12: v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x00,0xcc,0x7e,0x82,0xad,0x01]
@@ -748,7 +748,7 @@
# GFX12: v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,1,1] op_sel_hi:[1,0,0] ; encoding: [0x05,0x38,0x00,0xcc,0xc1,0xfe,0xf4,0x0b]
0x05,0x38,0x00,0xcc,0xc1,0xfe,0xf4,0x0b
-# GFX12: v_pk_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+# GFX12: v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] ; encoding: [0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13]
0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13
# GFX12: v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -778,7 +778,7 @@
# GFX12: v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19
-# GFX12: v_pk_mad_u16 v5, m0, 0x3800, m0
+# GFX12: v_pk_mad_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x40,0x09,0xcc,0x7d,0xe0,0xf5,0x19]
0x05,0x40,0x09,0xcc,0x7d,0xe0,0xf5,0x19
# GFX12: v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x09,0xcc,0x7e,0x82,0xad,0x01]
@@ -793,7 +793,7 @@
# GFX12: v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,1,1] op_sel_hi:[1,0,0] ; encoding: [0x05,0x38,0x09,0xcc,0xc1,0xfe,0xf4,0x0b]
0x05,0x38,0x09,0xcc,0xc1,0xfe,0xf4,0x0b
-# GFX12: v_pk_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+# GFX12: v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] ; encoding: [0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13]
0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13
# GFX12: v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -868,7 +868,7 @@
# GFX12: v_pk_max_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_max_i16 v5, m0, 0x3800
+# GFX12: v_pk_max_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_max_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
@@ -883,7 +883,7 @@
# GFX12: v_pk_max_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x07,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x07,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_max_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_max_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x07,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x07,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
@@ -913,7 +913,7 @@
# GFX12: v_pk_max_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_max_u16 v5, m0, 0x3800
+# GFX12: v_pk_max_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_max_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
@@ -928,7 +928,7 @@
# GFX12: v_pk_max_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0c,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0c,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_max_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_max_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0c,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0c,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1003,7 +1003,7 @@
# GFX12: v_pk_min_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_min_i16 v5, m0, 0x3800
+# GFX12: v_pk_min_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_min_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
@@ -1018,7 +1018,7 @@
# GFX12: v_pk_min_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x08,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x08,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_min_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_min_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x08,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x08,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1048,7 +1048,7 @@
# GFX12: v_pk_min_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_min_u16 v5, m0, 0x3800
+# GFX12: v_pk_min_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_min_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
@@ -1063,7 +1063,7 @@
# GFX12: v_pk_min_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0d,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0d,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_min_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_min_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0d,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0d,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1138,7 +1138,7 @@
# GFX12: v_pk_mul_lo_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_mul_lo_u16 v5, m0, 0x3800
+# GFX12: v_pk_mul_lo_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_mul_lo_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
@@ -1153,7 +1153,7 @@
# GFX12: v_pk_mul_lo_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x01,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x01,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_mul_lo_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x01,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x01,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1183,7 +1183,7 @@
# GFX12: v_pk_sub_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_sub_i16 v5, m0, 0x3800
+# GFX12: v_pk_sub_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_sub_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
@@ -1198,7 +1198,7 @@
# GFX12: v_pk_sub_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x03,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x03,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_sub_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_sub_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x03,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x03,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1228,7 +1228,7 @@
# GFX12: v_pk_sub_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_sub_u16 v5, m0, 0x3800
+# GFX12: v_pk_sub_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_sub_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
@@ -1243,7 +1243,7 @@
# GFX12: v_pk_sub_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0b,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0b,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_sub_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_sub_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0b,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0b,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt
index 215453d..003ece9 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt
@@ -42,10 +42,10 @@
# CHECK: v_pk_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xc1,0x04,0x0e,0x1c]
0x05,0x40,0x80,0xd3,0xc1,0x04,0x0e,0x1c
-# CHECK: v_pk_mad_i16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xff,0x04,0x0e,0x1c]
+# CHECK: v_pk_mad_i16 v5, 0.5, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xf0,0x04,0x0e,0x1c]
0x05,0x40,0x80,0xd3,0xf0,0x04,0x0e,0x1c
-# CHECK: v_pk_mad_i16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xff,0x04,0x0e,0x1c]
+# CHECK: v_pk_mad_i16 v5, -4.0, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xf7,0x04,0x0e,0x1c]
0x05,0x40,0x80,0xd3,0xf7,0x04,0x0e,0x1c
# CHECK: v_pk_mad_i16 v5, v1, v255, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0f,0x1c]
@@ -84,10 +84,10 @@
# CHECK: v_pk_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x83,0x0d,0x1c]
0x05,0x40,0x80,0xd3,0x01,0x83,0x0d,0x1c
-# CHECK: v_pk_mad_i16 v5, v1, 0x3800, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0d,0x1c]
+# CHECK: v_pk_mad_i16 v5, v1, 0.5, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xe1,0x0d,0x1c]
0x05,0x40,0x80,0xd3,0x01,0xe1,0x0d,0x1c
-# CHECK: v_pk_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0d,0x1c]
+# CHECK: v_pk_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xef,0x0d,0x1c]
0x05,0x40,0x80,0xd3,0x01,0xef,0x0d,0x1c
# CHECK: v_pk_mad_i16 v5, v1, v2, v255 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1f]
@@ -126,10 +126,10 @@
# CHECK: v_pk_mad_i16 v5, v1, v2, -1 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x06,0x1b]
0x05,0x40,0x80,0xd3,0x01,0x05,0x06,0x1b
-# CHECK: v_pk_mad_i16 v5, v1, v2, 0x3800 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1b]
+# CHECK: v_pk_mad_i16 v5, v1, v2, 0.5 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xc2,0x1b]
0x05,0x40,0x80,0xd3,0x01,0x05,0xc2,0x1b
-# CHECK: v_pk_mad_i16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1b]
+# CHECK: v_pk_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xde,0x1b]
0x05,0x40,0x80,0xd3,0x01,0x05,0xde,0x1b
# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x05,0x48,0x80,0xd3,0x01,0x05,0x0e,0x1c]
@@ -201,10 +201,10 @@
# CHECK: v_pk_mul_lo_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x81,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_mul_lo_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_mul_lo_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x81,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_mul_lo_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_mul_lo_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x81,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_mul_lo_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x03,0x18]
@@ -243,10 +243,10 @@
# CHECK: v_pk_mul_lo_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x81,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_mul_lo_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_mul_lo_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x81,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_mul_lo_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_mul_lo_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x81,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x81,0xd3,0x01,0x05,0x02,0x18]
@@ -309,10 +309,10 @@
# CHECK: v_pk_add_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x82,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_add_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_add_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x82,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_add_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_add_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x82,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_add_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x03,0x18]
@@ -351,10 +351,10 @@
# CHECK: v_pk_add_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x82,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_add_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_add_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x82,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_add_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_add_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x82,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_add_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x82,0xd3,0x01,0x05,0x02,0x18]
@@ -420,10 +420,10 @@
# CHECK: v_pk_sub_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x83,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_sub_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_sub_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x83,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_sub_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_sub_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x83,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_sub_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x03,0x18]
@@ -462,10 +462,10 @@
# CHECK: v_pk_sub_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x83,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_sub_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_sub_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x83,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_sub_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_sub_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x83,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x83,0xd3,0x01,0x05,0x02,0x18]
@@ -531,10 +531,10 @@
# CHECK: v_pk_lshlrev_b16 v5, -1, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x84,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_lshlrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_lshlrev_b16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x84,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_lshlrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_lshlrev_b16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x84,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_lshlrev_b16 v5, v1, v255 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x03,0x18]
@@ -573,10 +573,10 @@
# CHECK: v_pk_lshlrev_b16 v5, v1, -1 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x84,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_lshlrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_lshlrev_b16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x84,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_lshlrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_lshlrev_b16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x84,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x84,0xd3,0x01,0x05,0x02,0x18]
@@ -639,10 +639,10 @@
# CHECK: v_pk_lshrrev_b16 v5, -1, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x85,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_lshrrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_lshrrev_b16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x85,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_lshrrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_lshrrev_b16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x85,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_lshrrev_b16 v5, v1, v255 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x03,0x18]
@@ -681,10 +681,10 @@
# CHECK: v_pk_lshrrev_b16 v5, v1, -1 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x85,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_lshrrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_lshrrev_b16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x85,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_lshrrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_lshrrev_b16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x85,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x85,0xd3,0x01,0x05,0x02,0x18]
@@ -747,10 +747,10 @@
# CHECK: v_pk_ashrrev_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x86,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_ashrrev_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_ashrrev_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x86,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_ashrrev_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_ashrrev_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x86,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_ashrrev_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x03,0x18]
@@ -789,10 +789,10 @@
# CHECK: v_pk_ashrrev_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x86,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_ashrrev_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_ashrrev_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x86,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_ashrrev_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_ashrrev_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x86,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x86,0xd3,0x01,0x05,0x02,0x18]
@@ -855,10 +855,10 @@
# CHECK: v_pk_max_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x87,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_max_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_max_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x87,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_max_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_max_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x87,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_max_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x03,0x18]
@@ -897,10 +897,10 @@
# CHECK: v_pk_max_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x87,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_max_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_max_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x87,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_max_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_max_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x87,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_max_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x87,0xd3,0x01,0x05,0x02,0x18]
@@ -963,10 +963,10 @@
# CHECK: v_pk_min_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x88,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_min_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_min_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x88,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_min_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_min_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x88,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_min_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x03,0x18]
@@ -1005,10 +1005,10 @@
# CHECK: v_pk_min_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x88,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_min_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_min_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x88,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_min_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_min_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x88,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_min_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x88,0xd3,0x01,0x05,0x02,0x18]
@@ -1071,10 +1071,10 @@
# CHECK: v_pk_mad_u16 v5, -1, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xc1,0x04,0x0e,0x1c]
0x05,0x40,0x89,0xd3,0xc1,0x04,0x0e,0x1c
-# CHECK: v_pk_mad_u16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xff,0x04,0x0e,0x1c]
+# CHECK: v_pk_mad_u16 v5, 0.5, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xf0,0x04,0x0e,0x1c]
0x05,0x40,0x89,0xd3,0xf0,0x04,0x0e,0x1c
-# CHECK: v_pk_mad_u16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xff,0x04,0x0e,0x1c]
+# CHECK: v_pk_mad_u16 v5, -4.0, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xf7,0x04,0x0e,0x1c]
0x05,0x40,0x89,0xd3,0xf7,0x04,0x0e,0x1c
# CHECK: v_pk_mad_u16 v5, v1, v255, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0f,0x1c]
@@ -1113,10 +1113,10 @@
# CHECK: v_pk_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x83,0x0d,0x1c]
0x05,0x40,0x89,0xd3,0x01,0x83,0x0d,0x1c
-# CHECK: v_pk_mad_u16 v5, v1, 0x3800, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0d,0x1c]
+# CHECK: v_pk_mad_u16 v5, v1, 0.5, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xe1,0x0d,0x1c]
0x05,0x40,0x89,0xd3,0x01,0xe1,0x0d,0x1c
-# CHECK: v_pk_mad_u16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0d,0x1c]
+# CHECK: v_pk_mad_u16 v5, v1, -4.0, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xef,0x0d,0x1c]
0x05,0x40,0x89,0xd3,0x01,0xef,0x0d,0x1c
# CHECK: v_pk_mad_u16 v5, v1, v2, v255 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1f]
@@ -1155,10 +1155,10 @@
# CHECK: v_pk_mad_u16 v5, v1, v2, -1 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x06,0x1b]
0x05,0x40,0x89,0xd3,0x01,0x05,0x06,0x1b
-# CHECK: v_pk_mad_u16 v5, v1, v2, 0x3800 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1b]
+# CHECK: v_pk_mad_u16 v5, v1, v2, 0.5 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xc2,0x1b]
0x05,0x40,0x89,0xd3,0x01,0x05,0xc2,0x1b
-# CHECK: v_pk_mad_u16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1b]
+# CHECK: v_pk_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xde,0x1b]
0x05,0x40,0x89,0xd3,0x01,0x05,0xde,0x1b
# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x05,0x48,0x89,0xd3,0x01,0x05,0x0e,0x1c]
@@ -1230,10 +1230,10 @@
# CHECK: v_pk_add_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x8a,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_add_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_add_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x8a,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_add_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_add_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x8a,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_add_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x03,0x18]
@@ -1272,10 +1272,10 @@
# CHECK: v_pk_add_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x8a,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_add_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_add_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x8a,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_add_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_add_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x8a,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_add_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8a,0xd3,0x01,0x05,0x02,0x18]
@@ -1341,10 +1341,10 @@
# CHECK: v_pk_sub_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x8b,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_sub_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_sub_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x8b,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_sub_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_sub_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x8b,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_sub_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x03,0x18]
@@ -1383,10 +1383,10 @@
# CHECK: v_pk_sub_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x8b,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_sub_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_sub_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x8b,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_sub_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_sub_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x8b,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8b,0xd3,0x01,0x05,0x02,0x18]
@@ -1452,10 +1452,10 @@
# CHECK: v_pk_max_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x8c,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_max_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_max_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x8c,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_max_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_max_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x8c,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_max_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x03,0x18]
@@ -1494,10 +1494,10 @@
# CHECK: v_pk_max_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x8c,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_max_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_max_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x8c,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_max_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_max_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x8c,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_max_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8c,0xd3,0x01,0x05,0x02,0x18]
@@ -1560,10 +1560,10 @@
# CHECK: v_pk_min_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x8d,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_min_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_min_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x8d,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_min_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_min_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x8d,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_min_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x03,0x18]
@@ -1602,10 +1602,10 @@
# CHECK: v_pk_min_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x8d,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_min_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_min_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x8d,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_min_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_min_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x8d,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_min_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8d,0xd3,0x01,0x05,0x02,0x18]