aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.cpp21
-rw-r--r--clang/test/Driver/amdgpu-macros.cl5
-rw-r--r--clang/test/Driver/amdgpu-mcpu.cl10
-rw-r--r--clang/test/Misc/target-invalid-cpu-note.c2
-rw-r--r--llvm/docs/AMDGPUUsage.rst304
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h6
-rw-r--r--llvm/include/llvm/TargetParser/TargetParser.h10
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp10
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td87
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/GCNProcessors.td22
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h11
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp46
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll19
-rw-r--r--llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/gds-allocation.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/gds-atomic.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll31
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad-mix.ll421
-rw-r--r--llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll12
-rw-r--r--llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml29
-rw-r--r--llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll20
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test12
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp128
32 files changed, 1101 insertions, 202 deletions
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 141501e8..10cba6b 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,6 +17,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "llvm/ADT/SmallString.h"
using namespace clang;
using namespace clang::targets;
@@ -279,13 +280,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
return;
- StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
- : getArchNameR600(GPUKind);
+ llvm::SmallString<16> CanonName =
+ (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+ : getArchNameR600(GPUKind));
+
+ // Sanitize the name of generic targets.
+ // e.g. gfx10.1-generic -> gfx10_1_generic
+ if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
+ GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
+ std::replace(CanonName.begin(), CanonName.end(), '.', '_');
+ std::replace(CanonName.begin(), CanonName.end(), '-', '_');
+ }
+
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
if (isAMDGCN(getTriple()) && !IsHIPHost) {
- assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
- Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
+ assert(StringRef(CanonName).starts_with("gfx") &&
+ "Invalid amdgcn canonical name");
+ StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
+ Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
Twine("__"));
Builder.defineMacro("__amdgcn_processor__",
Twine("\"") + Twine(CanonName) + Twine("\""));
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index 81c22af..3b10444 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -131,6 +131,11 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
+
// ARCH-GCN-DAG: #define FP_FAST_FMA 1
// FAST_FMAF-DAG: #define FP_FAST_FMAF 1
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index eeb16ae..6f18ea0 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -115,6 +115,11 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
+
// GCNDEFAULT-NOT: -target-cpu
// GFX600: "-target-cpu" "gfx600"
// GFX601: "-target-cpu" "gfx601"
@@ -160,3 +165,8 @@
// GFX1151: "-target-cpu" "gfx1151"
// GFX1200: "-target-cpu" "gfx1200"
// GFX1201: "-target-cpu" "gfx1201"
+
+// GFX9_GENERIC: "-target-cpu" "gfx9-generic"
+// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic"
+// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic"
+// GFX11_GENERIC: "-target-cpu" "gfx11-generic"
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 39ed02f..123b203 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -37,7 +37,7 @@
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
-// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
+// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201, gfx9-generic, gfx10.1-generic, gfx10.3-generic, gfx11-generic{{$}}
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index ebc7fda..970b5e2 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -520,6 +520,108 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
=========== =============== ============ ===== ================= =============== =============== ======================
+Generic processors allow execution of a single code object on any of the processors that
+it supports. Such code objects may not perform as well as those for the non-generic processors.
+
+Generic processors are only available on code object V6 and above (see :ref:`amdgpu-elf-code-object`).
+
+Generic processor code objects are versioned (see :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`) between 1 and 255.
+The version of non-generic code objects is always set to 0.
+
+For a generic code object, adding a new supported processor may require the code generated for the generic target to be changed
+so it can continue to execute on the previously supported processors as well as on the new one.
+When this happens, the generic code object version number is incremented at the same time as the generic target is updated.
+
+Each supported processor of a generic target is mapped to the version it was introduced in.
+A generic code object can execute on a supported processor if the version of the code object being loaded is
+greater than or equal to the version in which the processor was added to the generic target.
+
+ .. table:: AMDGPU Generic Processors
+ :name: amdgpu-generic-processor-table
+
+ ==================== ============== ================= ================== ================= =================================
+ Processor Target Supported Target Features Target Properties Target Restrictions
+ Triple Processors Supported
+ Architecture
+
+ ==================== ============== ================= ================== ================= =================================
+ ``gfx9-generic`` ``amdgcn`` - ``gfx900`` - xnack - Absolute flat - ``v_mad_mix`` instructions
+ - ``gfx902`` scratch are not available on
+ - ``gfx904`` ``gfx900``, ``gfx902``,
+ - ``gfx906`` ``gfx909``, ``gfx90c``
+ - ``gfx909`` - ``v_fma_mix`` instructions
+ - ``gfx90c`` are not available on ``gfx904``
+ - sramecc is not available on
+ ``gfx906``
+ - The following instructions
+ are not available on ``gfx906``:
+
+ - ``v_fmac_f32``
+ - ``v_xnor_b32``
+ - ``v_dot4_i32_i8``
+ - ``v_dot8_i32_i4``
+ - ``v_dot2_i32_i16``
+ - ``v_dot2_u32_u16``
+ - ``v_dot4_u32_u8``
+ - ``v_dot8_u32_u4``
+ - ``v_dot2_f32_f16``
+
+
+ ``gfx10.1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are
+ - ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011``
+ - ``gfx1012`` - cumode and ``gfx1012``
+ - ``gfx1013``
+ - ``v_dot4_i32_i8``
+ - ``v_dot8_i32_i4``
+ - ``v_dot2_i32_i16``
+ - ``v_dot2_u32_u16``
+ - ``v_dot2c_f32_f16``
+ - ``v_dot4c_i32_i8``
+ - ``v_dot4_u32_u8``
+ - ``v_dot8_u32_u4``
+ - ``v_dot2_f32_f16``
+
+ - BVH Ray Tracing instructions
+ are not available on
+ ``gfx1013``
+
+
+ ``gfx10.3-generic`` ``amdgcn`` - ``gfx1030`` - wavefrontsize64 - Absolute flat No restrictions.
+ - ``gfx1031`` - cumode scratch
+ - ``gfx1032``
+ - ``gfx1033``
+ - ``gfx1034``
+ - ``gfx1035``
+ - ``gfx1036``
+
+
+ ``gfx11-generic`` ``amdgcn`` - ``gfx1100`` - wavefrontsize64 - Architected Various codegen pessimizations
+ - ``gfx1101`` - cumode flat scratch are applied to work around some
+ - ``gfx1102`` - Packed hazards specific to some targets
+ - ``gfx1103`` work-item within this family.
+ - ``gfx1150`` IDs
+ - ``gfx1151`` Not all VGPRs can be used on:
+
+ - ``gfx1100``
+ - ``gfx1101``
+ - ``gfx1151``
+
+ SALU floating point instructions
+ and single-use VGPR hint
+ instructions are not available
+ on:
+
+ - ``gfx1150``
+ - ``gfx1151``
+
+ SGPRs are not supported for src1
+ in dpp instructions for:
+
+ - ``gfx1150``
+ - ``gfx1151``
+ ==================== ============== ================= ================== ================= =================================
+
+
.. _amdgpu-target-features:
Target Features
@@ -533,7 +635,7 @@ generating the code. A mismatch of features may result in incorrect
execution, or a reduction in performance.
The target features supported by each processor is listed in
-:ref:`amdgpu-processor-table`.
+:ref:`amdgpu-processors`.
Target features are controlled by exactly one of the following Clang
options:
@@ -1443,6 +1545,7 @@ The AMDGPU backend uses the following ELF header:
- ``ELFABIVERSION_AMDGPU_HSA_V3``
- ``ELFABIVERSION_AMDGPU_HSA_V4``
- ``ELFABIVERSION_AMDGPU_HSA_V5``
+ - ``ELFABIVERSION_AMDGPU_HSA_V6``
- ``ELFABIVERSION_AMDGPU_PAL``
- ``ELFABIVERSION_AMDGPU_MESA3D``
``e_type`` - ``ET_REL``
@@ -1451,7 +1554,8 @@ The AMDGPU backend uses the following ELF header:
``e_entry`` 0
``e_flags`` See :ref:`amdgpu-elf-header-e_flags-v2-table`,
:ref:`amdgpu-elf-header-e_flags-table-v3`,
- and :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`
+ :ref:`amdgpu-elf-header-e_flags-table-v4-v5`,
+ and :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`
========================== ===============================
..
@@ -1471,6 +1575,7 @@ The AMDGPU backend uses the following ELF header:
``ELFABIVERSION_AMDGPU_HSA_V3`` 1
``ELFABIVERSION_AMDGPU_HSA_V4`` 2
``ELFABIVERSION_AMDGPU_HSA_V5`` 3
+ ``ELFABIVERSION_AMDGPU_HSA_V6`` 4
``ELFABIVERSION_AMDGPU_PAL`` 0
``ELFABIVERSION_AMDGPU_MESA3D`` 0
=============================== =====
@@ -1517,6 +1622,10 @@ The AMDGPU backend uses the following ELF header:
``-mcode-object-version=5``. This is the default code object
version if not specified.
+ * ``ELFABIVERSION_AMDGPU_HSA_V6`` is used to specify the version of AMD HSA
+ runtime ABI for code object V6. Specify using the Clang option
+ ``-mcode-object-version=6``.
+
* ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
runtime ABI.
@@ -1543,8 +1652,9 @@ The AMDGPU backend uses the following ELF header:
``NT_AMD_HSA_ISA_VERSION`` note record for code object V2 (see
:ref:`amdgpu-note-records-v2`) and in the ``EF_AMDGPU_MACH`` bit field of the
``e_flags`` for code object V3 and above (see
- :ref:`amdgpu-elf-header-e_flags-table-v3` and
- :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`).
+ :ref:`amdgpu-elf-header-e_flags-table-v3`,
+ :ref:`amdgpu-elf-header-e_flags-table-v4-v5` and
+ :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`).
``e_entry``
The entry point is 0 as the entry points for individual kernels must be
@@ -1615,8 +1725,8 @@ The AMDGPU backend uses the following ELF header:
:ref:`amdgpu-target-features`.
================================= ===== =============================
- .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and After
- :name: amdgpu-elf-header-e_flags-table-v4-onwards
+ .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and V5
+ :name: amdgpu-elf-header-e_flags-table-v4-v5
============================================ ===== ===================================
Name Value Description
@@ -1642,80 +1752,120 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
============================================ ===== ===================================
+ .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V6 and After
+ :name: amdgpu-elf-header-e_flags-table-v6-onwards
+
+ ============================================ ========== =========================================
+ Name Value Description
+ ============================================ ========== =========================================
+ ``EF_AMDGPU_MACH`` 0x0ff AMDGPU processor selection
+ mask for
+ ``EF_AMDGPU_MACH_xxx`` values
+ defined in
+ :ref:`amdgpu-ef-amdgpu-mach-table`.
+ ``EF_AMDGPU_FEATURE_XNACK_V4`` 0x300 XNACK selection mask for
+ ``EF_AMDGPU_FEATURE_XNACK_*_V4``
+ values.
+ ``EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4`` 0x000 XNACK unsupported.
+ ``EF_AMDGPU_FEATURE_XNACK_ANY_V4`` 0x100 XNACK can have any value.
+ ``EF_AMDGPU_FEATURE_XNACK_OFF_V4`` 0x200 XNACK disabled.
+ ``EF_AMDGPU_FEATURE_XNACK_ON_V4`` 0x300 XNACK enabled.
+ ``EF_AMDGPU_FEATURE_SRAMECC_V4`` 0xc00 SRAMECC selection mask for
+ ``EF_AMDGPU_FEATURE_SRAMECC_*_V4``
+ values.
+ ``EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4`` 0x000 SRAMECC unsupported.
+ ``EF_AMDGPU_FEATURE_SRAMECC_ANY_V4`` 0x400 SRAMECC can have any value.
+ ``EF_AMDGPU_FEATURE_SRAMECC_OFF_V4`` 0x800 SRAMECC disabled,
+ ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
+ ``EF_AMDGPU_GENERIC_VERSION_V`` 0xff000000 Generic code object version selection
+ mask. This is a value between 1 and 255,
+ stored in the most significant byte
+ of EFLAGS.
+ See :ref:`amdgpu-generic-processor-table`
+ ============================================ ========== =========================================
+
.. table:: AMDGPU ``EF_AMDGPU_MACH`` Values
:name: amdgpu-ef-amdgpu-mach-table
- ==================================== ========== =============================
- Name Value Description (see
- :ref:`amdgpu-processor-table`)
- ==================================== ========== =============================
- ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified*
- ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600``
- ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630``
- ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880``
- ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670``
- ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710``
- ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730``
- ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770``
- ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar``
- ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress``
- ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper``
- ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood``
- ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo``
- ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts``
- ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos``
- ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman``
- ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks``
- *reserved* 0x011 - Reserved for ``r600``
- 0x01f architecture processors.
- ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600``
- ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601``
- ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700``
- ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701``
- ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702``
- ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703``
- ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704``
- *reserved* 0x027 Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801``
- ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802``
- ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803``
- ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
- ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
- ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
- ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
- ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
- ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908``
- ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909``
- ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033``
- ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602``
- ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705``
- ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
- ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
- ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
- *reserved* 0x049 Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
- ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
- ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
- *reserved* 0x04d Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
- ==================================== ========== =============================
+ ========================================== ========== =============================
+ Name Value Description (see
+ :ref:`amdgpu-processor-table`)
+ ========================================== ========== =============================
+ ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified*
+ ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600``
+ ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630``
+ ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880``
+ ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670``
+ ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710``
+ ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730``
+ ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770``
+ ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar``
+ ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress``
+ ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper``
+ ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood``
+ ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo``
+ ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts``
+ ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos``
+ ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman``
+ ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks``
+ *reserved* 0x011 - Reserved for ``r600``
+ 0x01f architecture processors.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704``
+ *reserved* 0x027 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
+ *reserved* 0x049 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
+ *reserved* 0x04d Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ *reserved* 0x04f Reserved.
+ *reserved* 0x050 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10.1-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10.3-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic``
+ ========================================== ========== =============================
Sections
--------
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index efd41f9..3eddaee 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -790,11 +790,15 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
+ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
+ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
// clang-format on
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 6464285..7da1170 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -111,6 +111,14 @@ enum GPUKind : uint32_t {
GK_AMDGCN_FIRST = GK_GFX600,
GK_AMDGCN_LAST = GK_GFX1201,
+
+ GK_GFX9_GENERIC = 192,
+ GK_GFX10_1_GENERIC = 193,
+ GK_GFX10_3_GENERIC = 194,
+ GK_GFX11_GENERIC = 195,
+
+ GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
+ GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC,
};
/// Instruction set architecture version.
@@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t {
FEATURE_WGP = 1 << 9,
};
+StringRef getArchFamilyNameAMDGCN(GPUKind AK);
+
StringRef getArchNameAMDGCN(GPUKind AK);
StringRef getArchNameR600(GPUKind AK);
StringRef getCanonicalArchName(const Triple &T, StringRef Arch);
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 38a9e0e..01949c6 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1200";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201:
return "gfx1201";
+
+ // Generic AMDGCN targets
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
+ return "gfx9-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
+ return "gfx10.1-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
+ return "gfx10.3-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
+ return "gfx11-generic";
default:
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 1436e92..de1ef24 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -612,6 +612,10 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 55dbc1a..4ab2b12 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1002,6 +1002,12 @@ def FeatureGWS : SubtargetFeature<"gws",
"Has Global Wave Sync"
>;
+def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
+ "RequiresCOV6",
+ "true",
+ "Target Requires Code Object V6"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -1212,6 +1218,17 @@ def FeatureISAVersion9_0_Common : FeatureSet<
FeatureImageInsts,
FeatureMadMacF32Insts]>;
+def FeatureISAVersion9_0_Consumer_Common : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureImageGather4D16Bug,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureGDS])>;
+
+def FeatureISAVersion9_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion9_0_MI_Common : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureFmaMixInsts,
@@ -1230,43 +1247,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_0 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_2 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_4 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureFmaMixInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureFmaMixInsts])>;
def FeatureISAVersion9_0_6 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- HalfRate64Ops,
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [HalfRate64Ops,
FeatureFmaMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot7Insts,
FeatureDot10Insts,
- FeatureSupportsSRAMECC,
- FeatureImageGather4D16Bug])>;
+ FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_8 : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1279,13 +1280,9 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_9 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureImageInsts])>;
def FeatureISAVersion9_0_A : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1301,12 +1298,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureKernargPreload])>;
def FeatureISAVersion9_0_C : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_4_Common : FeatureSet<
[FeatureGFX9,
@@ -1387,6 +1380,10 @@ def FeatureISAVersion10_1_Common : FeatureSet<
FeatureFlatSegmentOffsetBug,
FeatureNegativeUnalignedScratchOffsetBug])>;
+def FeatureISAVersion10_1_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion10_1_0 : FeatureSet<
!listconcat(FeatureISAVersion10_1_Common.Features,
[])>;
@@ -1426,6 +1423,10 @@ def FeatureISAVersion10_3_0 : FeatureSet<
FeatureDot10Insts,
FeatureShaderCyclesRegister])>;
+def FeatureISAVersion10_3_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion10_3_0.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_Common : FeatureSet<
[FeatureGFX11,
FeatureLDSBankCount32,
@@ -1447,6 +1448,16 @@ def FeatureISAVersion11_Common : FeatureSet<
FeaturePackedTID,
FeatureVcmpxPermlaneHazard]>;
+// There are few workarounds that need to be
+// added to all targets. This pessimizes codegen
+// a bit on the generic GFX11 target.
+def FeatureISAVersion11_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureMSAALoadDstSelBug,
+ FeatureVALUTransUseHazard,
+ FeatureUserSGPRInit16Bug,
+ FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureMSAALoadDstSelBug,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1e..5777a7c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -156,6 +156,13 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
+ // TODO: We're checking this late, would be nice to check it earlier.
+ if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) {
+ report_fatal_error(
+ STM.getCPU() + " is only available on code object version 6 or better",
+ /*gen_crash_diag*/ false);
+ }
+
// TODO: Which one is called first, emitStartOfAsmFile or
// emitFunctionBodyStart?
if (!getTargetStreamer()->getTargetID())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
index 6f1236f..9d44b65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -139,10 +139,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
const GCNSubtarget *ST =
static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
- // Check the GPU isn't generic. Generic is used for testing only
- // and we don't want this pass to interfere with it.
+ // Check the GPU isn't generic or generic-hsa. Generic is used for testing
+ // only and we don't want this pass to interfere with it.
StringRef GPUName = ST->getCPU();
- if (GPUName.empty() || GPUName.contains("generic"))
+ if (GPUName.empty() || GPUName.starts_with("generic"))
return false;
// Try to fetch the GPU's info. If we can't, it's likely an unknown processor
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 96af1a6..4671e03 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -204,6 +204,11 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
+// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
+def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
+ FeatureISAVersion9_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
@@ -252,6 +257,16 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel,
FeatureISAVersion10_3_0.Features
>;
+// [gfx1010, gfx1011, gfx1012, gfx1013]
+def : ProcessorModel<"gfx10.1-generic", GFX10SpeedModel,
+ FeatureISAVersion10_1_Generic.Features
+>;
+
+// [gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036]
+def : ProcessorModel<"gfx10.3-generic", GFX10SpeedModel,
+ FeatureISAVersion10_3_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX11.
//===----------------------------------------------------------------------===//
@@ -280,10 +295,17 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel,
FeatureISAVersion11_5_1.Features
>;
+// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
+def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
+ FeatureISAVersion11_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX12.
//===----------------------------------------------------------------------===//
+// TODO: gfx12-generic ?
+
def : ProcessorModel<"gfx1200", GFX12SpeedModel,
FeatureISAVersion12.Features
>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4f8eeaa..b13b4f7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -224,6 +224,8 @@ protected:
bool HasVALUTransUseHazard = false;
bool HasForceStoreSC0SC1 = false;
+ bool RequiresCOV6 = false;
+
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1165,6 +1167,8 @@ public:
bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
+ bool requiresCodeObjectV6() const { return RequiresCOV6; }
+
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
/// Return if operations acting on VGPR tuples require even alignment.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 5e9b167..a25622c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -115,6 +115,10 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
default: AK = GK_NONE; break;
}
@@ -193,6 +197,10 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
+ case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
+ case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
+ case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
+ case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
// clang-format on
@@ -659,6 +667,24 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
unsigned Flags = getEFlagsV4();
unsigned Version = ForceGenericVersion;
+ if (!Version) {
+ switch (parseArchAMDGCN(STI.getCPU())) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ Version = GenericVersion::GFX9;
+ break;
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ Version = GenericVersion::GFX10_1;
+ break;
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ Version = GenericVersion::GFX10_3;
+ break;
+ case AMDGPU::GK_GFX11_GENERIC:
+ Version = GenericVersion::GFX11;
+ break;
+ default:
+ break;
+ }
+ }
// Versions start at 1.
if (Version) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f24b9f0..ded252c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,17 @@ namespace AMDGPU {
struct IsaVersion;
+/// Generic target versions emitted by this version of LLVM.
+///
+/// These numbers are incremented every time a codegen breaking change occurs
+/// within a generic family.
+namespace GenericVersion {
+static constexpr unsigned GFX9 = 1;
+static constexpr unsigned GFX10_1 = 1;
+static constexpr unsigned GFX10_3 = 1;
+static constexpr unsigned GFX11 = 1;
+} // namespace GenericVersion
+
enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
/// \returns True if \p STI is AMDHSA.
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 20f3246..684d698 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -126,6 +126,11 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+
+ {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx10.1-generic"}, {"gfx10.1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx10.3-generic"}, {"gfx10.3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
// clang-format on
};
@@ -144,6 +149,22 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
} // namespace
+StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
+ switch (AK) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ return "gfx9";
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ return "gfx10";
+ case AMDGPU::GK_GFX11_GENERIC:
+ return "gfx11";
+ default: {
+ StringRef ArchName = getArchNameAMDGCN(AK);
+ return ArchName.empty() ? "" : ArchName.drop_back(2);
+ }
+ }
+}
+
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
return Entry->CanonicalName;
@@ -253,6 +274,24 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1151: return {11, 5, 1};
case GK_GFX1200: return {12, 0, 0};
case GK_GFX1201: return {12, 0, 1};
+
+ // Generic targets return the lowest common denominator
+ // within their family. That is, the ISA that is the most
+ // restricted in terms of features.
+ //
+ // gfx9-generic is tricky because there is no lowest
+ // common denominator, so we return gfx900 which has mad-mix
+ // but this family doesn't have it.
+ //
+ // This API should never be used to check for a particular
+ // feature anyway.
+ //
+ // TODO: Split up this API depending on its caller so
+ // generic target handling is more obvious and less risky.
+ case GK_GFX9_GENERIC: return {9, 0, 0};
+ case GK_GFX10_1_GENERIC: return {10, 1, 0};
+ case GK_GFX10_3_GENERIC: return {10, 3, 0};
+ case GK_GFX11_GENERIC: return {11, 0, 3};
default: return {0, 0, 0};
}
// clang-format on
@@ -302,6 +341,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1102:
case GK_GFX1101:
case GK_GFX1100:
+ case GK_GFX11_GENERIC:
Features["ci-insts"] = true;
Features["dot5-insts"] = true;
Features["dot7-insts"] = true;
@@ -327,6 +367,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1032:
case GK_GFX1031:
case GK_GFX1030:
+ case GK_GFX10_3_GENERIC:
Features["ci-insts"] = true;
Features["dot1-insts"] = true;
Features["dot2-insts"] = true;
@@ -357,6 +398,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
[[fallthrough]];
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX10_1_GENERIC:
Features["dl-insts"] = true;
Features["ci-insts"] = true;
Features["16-bit-insts"] = true;
@@ -424,6 +466,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX904:
case GK_GFX902:
case GK_GFX900:
+ case GK_GFX9_GENERIC:
Features["gfx9-insts"] = true;
[[fallthrough]];
case GK_GFX810:
@@ -510,6 +553,9 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
case GK_GFX1011:
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX11_GENERIC:
+ case GK_GFX10_3_GENERIC:
+ case GK_GFX10_1_GENERIC:
IsWave32Capable = true;
break;
default:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index 155f4c9..9698a38 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -1,11 +1,12 @@
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
-; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs| FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -200,4 +201,4 @@ attributes #1 = { nounwind }
!2 = !{i32 1, i32 1, i32 64}
!llvm.module.flags = !{!99}
-!99 = !{i32 1, !"amdgpu_code_object_version", i32 400}
+!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index 357fcf8..038219f 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -108,6 +108,13 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck --check-prefixes=GFX11_GENERIC %s
+
; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
@@ -196,6 +203,13 @@
; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201"
+; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-"
+; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+"
+; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack-"
+; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack+"
+; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.3-generic"
+; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic"
+
define amdgpu_kernel void @directive_amdgcn_target() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index 380439d..9ba8176 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -77,6 +77,11 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s
+
; FIXME: With the default attributes the eflags are not accurate for
; xnack and sramecc. Subsequent Target-ID patches will address this.
@@ -149,6 +154,11 @@
; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
+
+; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
+; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
+; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
+; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
; ALL: ]
define amdgpu_kernel void @elf_header() {
diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
index dc6fea4..1a93347 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
index 3e4e693..8d44330 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
@@ -2,6 +2,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; FUNC-LABEL: {{^}}atomic_add_ret_gds:
; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
new file mode 100644
index 0000000..e3f4b14
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s
+
+; GFX9-V5: gfx9-generic is only available on code object version 6 or better
+; GFX101-V5: gfx10.1-generic is only available on code object version 6 or better
+; GFX103-V5: gfx10.3-generic is only available on code object version 6 or better
+; GFX11-V5: gfx11-generic is only available on code object version 6 or better
+
+define void @foo() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
new file mode 100644
index 0000000..4fee563
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
@@ -0,0 +1,31 @@
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic < %s | FileCheck -check-prefix=CU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck -check-prefix=CU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck -check-prefix=CU %s
+
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+
+; Checks 10.1, 10.3 and 11 generic targets allow cumode/wave64.
+
+; NOCU: .amdhsa_workgroup_processor_mode 0
+; NOCU: .workgroup_processor_mode: 0
+; CU: .amdhsa_workgroup_processor_mode 1
+; CU: .workgroup_processor_mode: 1
+
+; W64: .amdhsa_wavefront_size32 0
+; W32: .amdhsa_wavefront_size32 1
+
+define amdgpu_kernel void @wavefrontsize() {
+entry:
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 600}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
index 91284d3..cf324d6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -1,8 +1,11 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s
; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index 7dc139e..10e1ae3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index e8b9526..b520dd1 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -2,12 +2,14 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic -verify-machineinstrs --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX9GEN,SDAG-GFX9GEN %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GEN,GISEL-GFX9GEN %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
@@ -30,6 +32,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -79,6 +90,15 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -134,6 +154,15 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -193,6 +222,19 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v6, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v5
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v4, v6
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -242,6 +284,19 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v5, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v5
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v4, v6
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -300,6 +355,19 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
; GFX906-NEXT: v_mov_b32_e32 v0, v3
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_v2f32_shuffle:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mad_f32 v0, v3, v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v2, v4, v1
+; GFX9GEN-NEXT: v_mov_b32_e32 v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_v2f32_shuffle:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -378,6 +446,15 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -393,6 +470,15 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -437,6 +523,15 @@ define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -487,6 +582,15 @@ define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, -|v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -538,6 +642,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2)
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -584,6 +696,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -631,6 +751,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, |v2|
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -678,6 +806,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -|v2|
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -734,6 +870,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 1.0
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -806,6 +950,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -885,6 +1037,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1)
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -921,6 +1081,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1)
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -969,6 +1138,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1005,6 +1182,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x367c0000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1058,6 +1244,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 1.0
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 1.0
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1112,6 +1309,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 1.0
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 1.0
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1168,6 +1376,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v2, v4
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1224,6 +1444,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v4
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1283,6 +1515,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1338,6 +1581,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1386,6 +1640,15 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1440,6 +1703,12 @@ define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: no_mix_simple:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: no_mix_simple:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1474,6 +1743,12 @@ define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: no_mix_simple_fabs:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: no_mix_simple_fabs:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1516,6 +1791,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1568,6 +1852,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, fl
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1628,6 +1920,16 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0,
; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1691,6 +1993,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half
; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1741,6 +2052,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, hal
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1791,6 +2111,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1838,6 +2166,15 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1854,6 +2191,15 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1
; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1910,6 +2256,15 @@ define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1966,6 +2321,15 @@ define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2021,6 +2385,15 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2038,6 +2411,16 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2087,6 +2470,15 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2104,6 +2496,16 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2153,6 +2555,15 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg,
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2170,6 +2581,16 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg,
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_or_b32_e32 v0, 0x80008000, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
index 0878fc6..b08586e 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
@@ -1,15 +1,13 @@
-; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s
-; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s
; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s
; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s
; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
-; GFX906-LABEL: image_sample_test:
-; GFX906: image_sample_lz
-
-; GFX908-LABEL: image_sample_test:
-; GFX908: image_sample_lz
+; GFX9-LABEL: image_sample_test:
+; GFX9: image_sample_lz
; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 7fb33ca..4c2b447 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -238,6 +238,23 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1201 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1201 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX9_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_3_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_3_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_3_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX11_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX11_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX11_GENERIC %s
+
+
# ELF-R600-ALL: Format: elf32-amdgpu
# ELF-R600-ALL: Arch: r600
# ELF-R600-ALL: AddressSize: 32bit
@@ -435,6 +452,18 @@
# ELF-AMDGCN-GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
# YAML-AMDGCN-GFX1201: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1201 ]
+# ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
+# YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ]
+
+# ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
+# YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ]
+
+# ELF-AMDGCN-GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
+# YAML-AMDGCN-GFX10_3_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC ]
+
+# ELF-AMDGCN-GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
+# YAML-AMDGCN-GFX11_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC ]
+
# ELF-AMDGCN-ALL: ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index e296d7f..ca136a6 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -18,6 +18,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX11--------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx11-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -49,6 +54,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: diff %t-specify.txt %t-detect.txt
; ----------------------------------GFX10--------------------------------------
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.3-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.3-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1036 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -84,6 +94,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.1-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.1-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -107,6 +122,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index e2266d8..7fbf4aa 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -253,6 +253,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC -DFLAG_VALUE=0x52
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
@@ -322,6 +325,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC -DFLAG_VALUE=0x53
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
@@ -355,6 +361,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0x51
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
@@ -391,6 +400,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 -DFLAG_VALUE=0x44
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC -DFLAG_VALUE=0x54
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 -DFLAG_VALUE=0x43
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 82bb12f..8e68f08 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1559,68 +1559,72 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
};
// clang-format off
-#define AMDGPU_MACH_ENUM_ENTS \
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201")
+#define AMDGPU_MACH_ENUM_ENTS \
+ ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10.1-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10.3-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic")
// clang-format on
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {