aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierre van Houtryve <pierre.vanhoutryve@amd.com>2024-02-12 10:18:20 +0100
committerGitHub <noreply@github.com>2024-02-12 10:18:20 +0100
commitf93aa5157a3317b24cff660ac972814ee9ed4dbc (patch)
tree35612be67cf41bfc720363bfcc15bb6c3e753812
parentb221b9733688d149dc288339e304e48af609ad75 (diff)
downloadllvm-f93aa5157a3317b24cff660ac972814ee9ed4dbc.zip
llvm-f93aa5157a3317b24cff660ac972814ee9ed4dbc.tar.gz
llvm-f93aa5157a3317b24cff660ac972814ee9ed4dbc.tar.bz2
[AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets (#76955)
These generic targets include multiple GPUs and will, in the future, provide a way to build once and run on multiple GPU, at the cost of less optimization opportunities. Note that this is just doing the compiler side of things, device libs an runtimes/loader/etc. don't know about these targets yet, so none of them actually work in practice right now. This is just the initial commit to make LLVM aware of them. This contains the documentation changes for both this change and #76954 as well.
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.cpp21
-rw-r--r--clang/test/Driver/amdgpu-macros.cl5
-rw-r--r--clang/test/Driver/amdgpu-mcpu.cl10
-rw-r--r--clang/test/Misc/target-invalid-cpu-note.c2
-rw-r--r--llvm/docs/AMDGPUUsage.rst304
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h6
-rw-r--r--llvm/include/llvm/TargetParser/TargetParser.h10
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp10
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td87
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/GCNProcessors.td22
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h11
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp46
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll19
-rw-r--r--llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/gds-allocation.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/gds-atomic.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll31
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad-mix.ll421
-rw-r--r--llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll12
-rw-r--r--llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml29
-rw-r--r--llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll20
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test12
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp128
32 files changed, 1101 insertions, 202 deletions
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 141501e8..10cba6b 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,6 +17,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "llvm/ADT/SmallString.h"
using namespace clang;
using namespace clang::targets;
@@ -279,13 +280,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
return;
- StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
- : getArchNameR600(GPUKind);
+ llvm::SmallString<16> CanonName =
+ (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+ : getArchNameR600(GPUKind));
+
+ // Sanitize the name of generic targets.
+ // e.g. gfx10.1-generic -> gfx10_1_generic
+ if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
+ GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
+ std::replace(CanonName.begin(), CanonName.end(), '.', '_');
+ std::replace(CanonName.begin(), CanonName.end(), '-', '_');
+ }
+
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
if (isAMDGCN(getTriple()) && !IsHIPHost) {
- assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
- Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
+ assert(StringRef(CanonName).starts_with("gfx") &&
+ "Invalid amdgcn canonical name");
+ StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
+ Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
Twine("__"));
Builder.defineMacro("__amdgcn_processor__",
Twine("\"") + Twine(CanonName) + Twine("\""));
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index 81c22af..3b10444 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -131,6 +131,11 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
+
// ARCH-GCN-DAG: #define FP_FAST_FMA 1
// FAST_FMAF-DAG: #define FP_FAST_FMAF 1
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index eeb16ae..6f18ea0 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -115,6 +115,11 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
+
// GCNDEFAULT-NOT: -target-cpu
// GFX600: "-target-cpu" "gfx600"
// GFX601: "-target-cpu" "gfx601"
@@ -160,3 +165,8 @@
// GFX1151: "-target-cpu" "gfx1151"
// GFX1200: "-target-cpu" "gfx1200"
// GFX1201: "-target-cpu" "gfx1201"
+
+// GFX9_GENERIC: "-target-cpu" "gfx9-generic"
+// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic"
+// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic"
+// GFX11_GENERIC: "-target-cpu" "gfx11-generic"
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 39ed02f..123b203 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -37,7 +37,7 @@
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
-// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
+// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201, gfx9-generic, gfx10.1-generic, gfx10.3-generic, gfx11-generic{{$}}
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index ebc7fda..970b5e2 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -520,6 +520,108 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
=========== =============== ============ ===== ================= =============== =============== ======================
+Generic processors allow execution of a single code object on any of the processors that
+it supports. Such code objects may not perform as well as those for the non-generic processors.
+
+Generic processors are only available on code object V6 and above (see :ref:`amdgpu-elf-code-object`).
+
+Generic processor code objects are versioned (see :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`) between 1 and 255.
+The version of non-generic code objects is always set to 0.
+
+For a generic code object, adding a new supported processor may require the code generated for the generic target to be changed
+so it can continue to execute on the previously supported processors as well as on the new one.
+When this happens, the generic code object version number is incremented at the same time as the generic target is updated.
+
+Each supported processor of a generic target is mapped to the version it was introduced in.
+A generic code object can execute on a supported processor if the version of the code object being loaded is
+greater than or equal to the version in which the processor was added to the generic target.
+
+ .. table:: AMDGPU Generic Processors
+ :name: amdgpu-generic-processor-table
+
+ ==================== ============== ================= ================== ================= =================================
+ Processor Target Supported Target Features Target Properties Target Restrictions
+ Triple Processors Supported
+ Architecture
+
+ ==================== ============== ================= ================== ================= =================================
+ ``gfx9-generic`` ``amdgcn`` - ``gfx900`` - xnack - Absolute flat - ``v_mad_mix`` instructions
+ - ``gfx902`` scratch are not available on
+ - ``gfx904`` ``gfx900``, ``gfx902``,
+ - ``gfx906`` ``gfx909``, ``gfx90c``
+ - ``gfx909`` - ``v_fma_mix`` instructions
+ - ``gfx90c`` are not available on ``gfx904``
+ - sramecc is not available on
+ ``gfx906``
+ - The following instructions
+ are not available on ``gfx906``:
+
+ - ``v_fmac_f32``
+ - ``v_xnor_b32``
+ - ``v_dot4_i32_i8``
+ - ``v_dot8_i32_i4``
+ - ``v_dot2_i32_i16``
+ - ``v_dot2_u32_u16``
+ - ``v_dot4_u32_u8``
+ - ``v_dot8_u32_u4``
+ - ``v_dot2_f32_f16``
+
+
+ ``gfx10.1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are
+ - ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011``
+ - ``gfx1012`` - cumode and ``gfx1012``
+ - ``gfx1013``
+ - ``v_dot4_i32_i8``
+ - ``v_dot8_i32_i4``
+ - ``v_dot2_i32_i16``
+ - ``v_dot2_u32_u16``
+ - ``v_dot2c_f32_f16``
+ - ``v_dot4c_i32_i8``
+ - ``v_dot4_u32_u8``
+ - ``v_dot8_u32_u4``
+ - ``v_dot2_f32_f16``
+
+ - BVH Ray Tracing instructions
+ are not available on
+ ``gfx1013``
+
+
+ ``gfx10.3-generic`` ``amdgcn`` - ``gfx1030`` - wavefrontsize64 - Absolute flat No restrictions.
+ - ``gfx1031`` - cumode scratch
+ - ``gfx1032``
+ - ``gfx1033``
+ - ``gfx1034``
+ - ``gfx1035``
+ - ``gfx1036``
+
+
+ ``gfx11-generic`` ``amdgcn`` - ``gfx1100`` - wavefrontsize64 - Architected Various codegen pessimizations
+ - ``gfx1101`` - cumode flat scratch are applied to work around some
+ - ``gfx1102`` - Packed hazards specific to some targets
+ - ``gfx1103`` work-item within this family.
+ - ``gfx1150`` IDs
+ - ``gfx1151`` Not all VGPRs can be used on:
+
+ - ``gfx1100``
+ - ``gfx1101``
+ - ``gfx1151``
+
+ SALU floating point instructions
+ and single-use VGPR hint
+ instructions are not available
+ on:
+
+ - ``gfx1150``
+ - ``gfx1151``
+
+ SGPRs are not supported for src1
+ in dpp instructions for:
+
+ - ``gfx1150``
+ - ``gfx1151``
+ ==================== ============== ================= ================== ================= =================================
+
+
.. _amdgpu-target-features:
Target Features
@@ -533,7 +635,7 @@ generating the code. A mismatch of features may result in incorrect
execution, or a reduction in performance.
The target features supported by each processor is listed in
-:ref:`amdgpu-processor-table`.
+:ref:`amdgpu-processors`.
Target features are controlled by exactly one of the following Clang
options:
@@ -1443,6 +1545,7 @@ The AMDGPU backend uses the following ELF header:
- ``ELFABIVERSION_AMDGPU_HSA_V3``
- ``ELFABIVERSION_AMDGPU_HSA_V4``
- ``ELFABIVERSION_AMDGPU_HSA_V5``
+ - ``ELFABIVERSION_AMDGPU_HSA_V6``
- ``ELFABIVERSION_AMDGPU_PAL``
- ``ELFABIVERSION_AMDGPU_MESA3D``
``e_type`` - ``ET_REL``
@@ -1451,7 +1554,8 @@ The AMDGPU backend uses the following ELF header:
``e_entry`` 0
``e_flags`` See :ref:`amdgpu-elf-header-e_flags-v2-table`,
:ref:`amdgpu-elf-header-e_flags-table-v3`,
- and :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`
+ :ref:`amdgpu-elf-header-e_flags-table-v4-v5`,
+ and :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`
========================== ===============================
..
@@ -1471,6 +1575,7 @@ The AMDGPU backend uses the following ELF header:
``ELFABIVERSION_AMDGPU_HSA_V3`` 1
``ELFABIVERSION_AMDGPU_HSA_V4`` 2
``ELFABIVERSION_AMDGPU_HSA_V5`` 3
+ ``ELFABIVERSION_AMDGPU_HSA_V6`` 4
``ELFABIVERSION_AMDGPU_PAL`` 0
``ELFABIVERSION_AMDGPU_MESA3D`` 0
=============================== =====
@@ -1517,6 +1622,10 @@ The AMDGPU backend uses the following ELF header:
``-mcode-object-version=5``. This is the default code object
version if not specified.
+ * ``ELFABIVERSION_AMDGPU_HSA_V6`` is used to specify the version of AMD HSA
+ runtime ABI for code object V6. Specify using the Clang option
+ ``-mcode-object-version=6``.
+
* ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
runtime ABI.
@@ -1543,8 +1652,9 @@ The AMDGPU backend uses the following ELF header:
``NT_AMD_HSA_ISA_VERSION`` note record for code object V2 (see
:ref:`amdgpu-note-records-v2`) and in the ``EF_AMDGPU_MACH`` bit field of the
``e_flags`` for code object V3 and above (see
- :ref:`amdgpu-elf-header-e_flags-table-v3` and
- :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`).
+ :ref:`amdgpu-elf-header-e_flags-table-v3`,
+ :ref:`amdgpu-elf-header-e_flags-table-v4-v5` and
+ :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`).
``e_entry``
The entry point is 0 as the entry points for individual kernels must be
@@ -1615,8 +1725,8 @@ The AMDGPU backend uses the following ELF header:
:ref:`amdgpu-target-features`.
================================= ===== =============================
- .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and After
- :name: amdgpu-elf-header-e_flags-table-v4-onwards
+ .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and V5
+ :name: amdgpu-elf-header-e_flags-table-v4-v5
============================================ ===== ===================================
Name Value Description
@@ -1642,80 +1752,120 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
============================================ ===== ===================================
+ .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V6 and After
+ :name: amdgpu-elf-header-e_flags-table-v6-onwards
+
+ ============================================ ========== =========================================
+ Name Value Description
+ ============================================ ========== =========================================
+ ``EF_AMDGPU_MACH`` 0x0ff AMDGPU processor selection
+ mask for
+ ``EF_AMDGPU_MACH_xxx`` values
+ defined in
+ :ref:`amdgpu-ef-amdgpu-mach-table`.
+ ``EF_AMDGPU_FEATURE_XNACK_V4`` 0x300 XNACK selection mask for
+ ``EF_AMDGPU_FEATURE_XNACK_*_V4``
+ values.
+ ``EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4`` 0x000 XNACK unsupported.
+ ``EF_AMDGPU_FEATURE_XNACK_ANY_V4`` 0x100 XNACK can have any value.
+ ``EF_AMDGPU_FEATURE_XNACK_OFF_V4`` 0x200 XNACK disabled.
+ ``EF_AMDGPU_FEATURE_XNACK_ON_V4`` 0x300 XNACK enabled.
+ ``EF_AMDGPU_FEATURE_SRAMECC_V4`` 0xc00 SRAMECC selection mask for
+ ``EF_AMDGPU_FEATURE_SRAMECC_*_V4``
+ values.
+ ``EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4`` 0x000 SRAMECC unsupported.
+ ``EF_AMDGPU_FEATURE_SRAMECC_ANY_V4`` 0x400 SRAMECC can have any value.
+ ``EF_AMDGPU_FEATURE_SRAMECC_OFF_V4`` 0x800 SRAMECC disabled,
+ ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
+ ``EF_AMDGPU_GENERIC_VERSION_V`` 0xff000000 Generic code object version selection
+ mask. This is a value between 1 and 255,
+ stored in the most significant byte
+ of EFLAGS.
+ See :ref:`amdgpu-generic-processor-table`
+ ============================================ ========== =========================================
+
.. table:: AMDGPU ``EF_AMDGPU_MACH`` Values
:name: amdgpu-ef-amdgpu-mach-table
- ==================================== ========== =============================
- Name Value Description (see
- :ref:`amdgpu-processor-table`)
- ==================================== ========== =============================
- ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified*
- ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600``
- ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630``
- ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880``
- ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670``
- ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710``
- ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730``
- ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770``
- ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar``
- ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress``
- ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper``
- ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood``
- ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo``
- ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts``
- ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos``
- ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman``
- ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks``
- *reserved* 0x011 - Reserved for ``r600``
- 0x01f architecture processors.
- ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600``
- ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601``
- ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700``
- ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701``
- ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702``
- ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703``
- ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704``
- *reserved* 0x027 Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801``
- ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802``
- ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803``
- ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
- ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
- ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
- ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
- ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
- ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908``
- ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909``
- ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033``
- ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602``
- ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705``
- ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
- ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
- ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
- *reserved* 0x049 Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
- ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
- ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
- *reserved* 0x04d Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
- ==================================== ========== =============================
+ ========================================== ========== =============================
+ Name Value Description (see
+ :ref:`amdgpu-processor-table`)
+ ========================================== ========== =============================
+ ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified*
+ ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600``
+ ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630``
+ ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880``
+ ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670``
+ ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710``
+ ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730``
+ ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770``
+ ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar``
+ ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress``
+ ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper``
+ ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood``
+ ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo``
+ ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts``
+ ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos``
+ ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman``
+ ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks``
+ *reserved* 0x011 - Reserved for ``r600``
+ 0x01f architecture processors.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704``
+ *reserved* 0x027 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
+ *reserved* 0x049 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
+ *reserved* 0x04d Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ *reserved* 0x04f Reserved.
+ *reserved* 0x050 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10.1-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10.3-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic``
+ ========================================== ========== =============================
Sections
--------
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index efd41f9..3eddaee 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -790,11 +790,15 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
+ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
+ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
// clang-format on
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 6464285..7da1170 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -111,6 +111,14 @@ enum GPUKind : uint32_t {
GK_AMDGCN_FIRST = GK_GFX600,
GK_AMDGCN_LAST = GK_GFX1201,
+
+ GK_GFX9_GENERIC = 192,
+ GK_GFX10_1_GENERIC = 193,
+ GK_GFX10_3_GENERIC = 194,
+ GK_GFX11_GENERIC = 195,
+
+ GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
+ GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC,
};
/// Instruction set architecture version.
@@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t {
FEATURE_WGP = 1 << 9,
};
+StringRef getArchFamilyNameAMDGCN(GPUKind AK);
+
StringRef getArchNameAMDGCN(GPUKind AK);
StringRef getArchNameR600(GPUKind AK);
StringRef getCanonicalArchName(const Triple &T, StringRef Arch);
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 38a9e0e..01949c6 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1200";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201:
return "gfx1201";
+
+ // Generic AMDGCN targets
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
+ return "gfx9-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
+ return "gfx10.1-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
+ return "gfx10.3-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
+ return "gfx11-generic";
default:
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 1436e92..de1ef24 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -612,6 +612,10 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 55dbc1a..4ab2b12 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1002,6 +1002,12 @@ def FeatureGWS : SubtargetFeature<"gws",
"Has Global Wave Sync"
>;
+def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
+ "RequiresCOV6",
+ "true",
+ "Target Requires Code Object V6"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -1212,6 +1218,17 @@ def FeatureISAVersion9_0_Common : FeatureSet<
FeatureImageInsts,
FeatureMadMacF32Insts]>;
+def FeatureISAVersion9_0_Consumer_Common : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureImageGather4D16Bug,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureGDS])>;
+
+def FeatureISAVersion9_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion9_0_MI_Common : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureFmaMixInsts,
@@ -1230,43 +1247,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_0 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_2 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_4 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureFmaMixInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureFmaMixInsts])>;
def FeatureISAVersion9_0_6 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- HalfRate64Ops,
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [HalfRate64Ops,
FeatureFmaMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot7Insts,
FeatureDot10Insts,
- FeatureSupportsSRAMECC,
- FeatureImageGather4D16Bug])>;
+ FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_8 : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1279,13 +1280,9 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_9 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureImageInsts])>;
def FeatureISAVersion9_0_A : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1301,12 +1298,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureKernargPreload])>;
def FeatureISAVersion9_0_C : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_4_Common : FeatureSet<
[FeatureGFX9,
@@ -1387,6 +1380,10 @@ def FeatureISAVersion10_1_Common : FeatureSet<
FeatureFlatSegmentOffsetBug,
FeatureNegativeUnalignedScratchOffsetBug])>;
+def FeatureISAVersion10_1_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion10_1_0 : FeatureSet<
!listconcat(FeatureISAVersion10_1_Common.Features,
[])>;
@@ -1426,6 +1423,10 @@ def FeatureISAVersion10_3_0 : FeatureSet<
FeatureDot10Insts,
FeatureShaderCyclesRegister])>;
+def FeatureISAVersion10_3_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion10_3_0.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_Common : FeatureSet<
[FeatureGFX11,
FeatureLDSBankCount32,
@@ -1447,6 +1448,16 @@ def FeatureISAVersion11_Common : FeatureSet<
FeaturePackedTID,
FeatureVcmpxPermlaneHazard]>;
+// There are few workarounds that need to be
+// added to all targets. This pessimizes codegen
+// a bit on the generic GFX11 target.
+def FeatureISAVersion11_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureMSAALoadDstSelBug,
+ FeatureVALUTransUseHazard,
+ FeatureUserSGPRInit16Bug,
+ FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureMSAALoadDstSelBug,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1e..5777a7c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -156,6 +156,13 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
+ // TODO: We're checking this late, would be nice to check it earlier.
+ if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) {
+ report_fatal_error(
+ STM.getCPU() + " is only available on code object version 6 or better",
+ /*gen_crash_diag*/ false);
+ }
+
// TODO: Which one is called first, emitStartOfAsmFile or
// emitFunctionBodyStart?
if (!getTargetStreamer()->getTargetID())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
index 6f1236f..9d44b65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -139,10 +139,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
const GCNSubtarget *ST =
static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
- // Check the GPU isn't generic. Generic is used for testing only
- // and we don't want this pass to interfere with it.
+ // Check the GPU isn't generic or generic-hsa. Generic is used for testing
+ // only and we don't want this pass to interfere with it.
StringRef GPUName = ST->getCPU();
- if (GPUName.empty() || GPUName.contains("generic"))
+ if (GPUName.empty() || GPUName.starts_with("generic"))
return false;
// Try to fetch the GPU's info. If we can't, it's likely an unknown processor
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 96af1a6..4671e03 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -204,6 +204,11 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
+// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
+def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
+ FeatureISAVersion9_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
@@ -252,6 +257,16 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel,
FeatureISAVersion10_3_0.Features
>;
+// [gfx1010, gfx1011, gfx1012, gfx1013]
+def : ProcessorModel<"gfx10.1-generic", GFX10SpeedModel,
+ FeatureISAVersion10_1_Generic.Features
+>;
+
+// [gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036]
+def : ProcessorModel<"gfx10.3-generic", GFX10SpeedModel,
+ FeatureISAVersion10_3_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX11.
//===----------------------------------------------------------------------===//
@@ -280,10 +295,17 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel,
FeatureISAVersion11_5_1.Features
>;
+// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
+def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
+ FeatureISAVersion11_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX12.
//===----------------------------------------------------------------------===//
+// TODO: gfx12-generic ?
+
def : ProcessorModel<"gfx1200", GFX12SpeedModel,
FeatureISAVersion12.Features
>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4f8eeaa..b13b4f7 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -224,6 +224,8 @@ protected:
bool HasVALUTransUseHazard = false;
bool HasForceStoreSC0SC1 = false;
+ bool RequiresCOV6 = false;
+
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1165,6 +1167,8 @@ public:
bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
+ bool requiresCodeObjectV6() const { return RequiresCOV6; }
+
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
/// Return if operations acting on VGPR tuples require even alignment.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 5e9b167..a25622c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -115,6 +115,10 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
default: AK = GK_NONE; break;
}
@@ -193,6 +197,10 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
+ case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
+ case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
+ case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
+ case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
// clang-format on
@@ -659,6 +667,24 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
unsigned Flags = getEFlagsV4();
unsigned Version = ForceGenericVersion;
+ if (!Version) {
+ switch (parseArchAMDGCN(STI.getCPU())) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ Version = GenericVersion::GFX9;
+ break;
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ Version = GenericVersion::GFX10_1;
+ break;
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ Version = GenericVersion::GFX10_3;
+ break;
+ case AMDGPU::GK_GFX11_GENERIC:
+ Version = GenericVersion::GFX11;
+ break;
+ default:
+ break;
+ }
+ }
// Versions start at 1.
if (Version) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f24b9f0..ded252c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,17 @@ namespace AMDGPU {
struct IsaVersion;
+/// Generic target versions emitted by this version of LLVM.
+///
+/// These numbers are incremented every time a codegen breaking change occurs
+/// within a generic family.
+namespace GenericVersion {
+static constexpr unsigned GFX9 = 1;
+static constexpr unsigned GFX10_1 = 1;
+static constexpr unsigned GFX10_3 = 1;
+static constexpr unsigned GFX11 = 1;
+} // namespace GenericVersion
+
enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
/// \returns True if \p STI is AMDHSA.
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 20f3246..684d698 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -126,6 +126,11 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+
+ {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx10.1-generic"}, {"gfx10.1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx10.3-generic"}, {"gfx10.3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
// clang-format on
};
@@ -144,6 +149,22 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
} // namespace
+StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
+ switch (AK) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ return "gfx9";
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ return "gfx10";
+ case AMDGPU::GK_GFX11_GENERIC:
+ return "gfx11";
+ default: {
+ StringRef ArchName = getArchNameAMDGCN(AK);
+ return ArchName.empty() ? "" : ArchName.drop_back(2);
+ }
+ }
+}
+
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
return Entry->CanonicalName;
@@ -253,6 +274,24 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1151: return {11, 5, 1};
case GK_GFX1200: return {12, 0, 0};
case GK_GFX1201: return {12, 0, 1};
+
+ // Generic targets return the lowest common denominator
+ // within their family. That is, the ISA that is the most
+ // restricted in terms of features.
+ //
+ // gfx9-generic is tricky because there is no lowest
+ // common denominator, so we return gfx900 which has mad-mix
+ // but this family doesn't have it.
+ //
+ // This API should never be used to check for a particular
+ // feature anyway.
+ //
+ // TODO: Split up this API depending on its caller so
+ // generic target handling is more obvious and less risky.
+ case GK_GFX9_GENERIC: return {9, 0, 0};
+ case GK_GFX10_1_GENERIC: return {10, 1, 0};
+ case GK_GFX10_3_GENERIC: return {10, 3, 0};
+ case GK_GFX11_GENERIC: return {11, 0, 3};
default: return {0, 0, 0};
}
// clang-format on
@@ -302,6 +341,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1102:
case GK_GFX1101:
case GK_GFX1100:
+ case GK_GFX11_GENERIC:
Features["ci-insts"] = true;
Features["dot5-insts"] = true;
Features["dot7-insts"] = true;
@@ -327,6 +367,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1032:
case GK_GFX1031:
case GK_GFX1030:
+ case GK_GFX10_3_GENERIC:
Features["ci-insts"] = true;
Features["dot1-insts"] = true;
Features["dot2-insts"] = true;
@@ -357,6 +398,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
[[fallthrough]];
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX10_1_GENERIC:
Features["dl-insts"] = true;
Features["ci-insts"] = true;
Features["16-bit-insts"] = true;
@@ -424,6 +466,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX904:
case GK_GFX902:
case GK_GFX900:
+ case GK_GFX9_GENERIC:
Features["gfx9-insts"] = true;
[[fallthrough]];
case GK_GFX810:
@@ -510,6 +553,9 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
case GK_GFX1011:
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX11_GENERIC:
+ case GK_GFX10_3_GENERIC:
+ case GK_GFX10_1_GENERIC:
IsWave32Capable = true;
break;
default:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index 155f4c9..9698a38 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -1,11 +1,12 @@
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
-; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs| FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -200,4 +201,4 @@ attributes #1 = { nounwind }
!2 = !{i32 1, i32 1, i32 64}
!llvm.module.flags = !{!99}
-!99 = !{i32 1, !"amdgpu_code_object_version", i32 400}
+!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index 357fcf8..038219f 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -108,6 +108,13 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck --check-prefixes=GFX11_GENERIC %s
+
; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
@@ -196,6 +203,13 @@
; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201"
+; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-"
+; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+"
+; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack-"
+; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack+"
+; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.3-generic"
+; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic"
+
define amdgpu_kernel void @directive_amdgcn_target() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index 380439d..9ba8176 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -77,6 +77,11 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s
+
; FIXME: With the default attributes the eflags are not accurate for
; xnack and sramecc. Subsequent Target-ID patches will address this.
@@ -149,6 +154,11 @@
; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
+
+; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
+; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
+; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
+; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
; ALL: ]
define amdgpu_kernel void @elf_header() {
diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
index dc6fea4..1a93347 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
index 3e4e693..8d44330 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
@@ -2,6 +2,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; FUNC-LABEL: {{^}}atomic_add_ret_gds:
; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
new file mode 100644
index 0000000..e3f4b14
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s
+
+; GFX9-V5: gfx9-generic is only available on code object version 6 or better
+; GFX101-V5: gfx10.1-generic is only available on code object version 6 or better
+; GFX103-V5: gfx10.3-generic is only available on code object version 6 or better
+; GFX11-V5: gfx11-generic is only available on code object version 6 or better
+
+define void @foo() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
new file mode 100644
index 0000000..4fee563
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
@@ -0,0 +1,31 @@
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic < %s | FileCheck -check-prefix=CU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck -check-prefix=CU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck -check-prefix=CU %s
+
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+
+; Checks 10.1, 10.3 and 11 generic targets allow cumode/wave64.
+
+; NOCU: .amdhsa_workgroup_processor_mode 0
+; NOCU: .workgroup_processor_mode: 0
+; CU: .amdhsa_workgroup_processor_mode 1
+; CU: .workgroup_processor_mode: 1
+
+; W64: .amdhsa_wavefront_size32 0
+; W32: .amdhsa_wavefront_size32 1
+
+define amdgpu_kernel void @wavefrontsize() {
+entry:
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 600}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
index 91284d3..cf324d6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -1,8 +1,11 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s
; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index 7dc139e..10e1ae3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index e8b9526..b520dd1 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -2,12 +2,14 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic -verify-machineinstrs --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX9GEN,SDAG-GFX9GEN %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GEN,GISEL-GFX9GEN %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
@@ -30,6 +32,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -79,6 +90,15 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -134,6 +154,15 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -193,6 +222,19 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v6, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v5
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v4, v6
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -242,6 +284,19 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v5, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v5
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v4, v6
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -300,6 +355,19 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
; GFX906-NEXT: v_mov_b32_e32 v0, v3
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_v2f32_shuffle:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mad_f32 v0, v3, v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v2, v4, v1
+; GFX9GEN-NEXT: v_mov_b32_e32 v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_v2f32_shuffle:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -378,6 +446,15 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -393,6 +470,15 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -437,6 +523,15 @@ define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -487,6 +582,15 @@ define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, -|v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -538,6 +642,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2)
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -584,6 +696,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -631,6 +751,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, |v2|
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -678,6 +806,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -|v2|
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -734,6 +870,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 1.0
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -806,6 +950,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -885,6 +1037,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1)
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -921,6 +1081,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1)
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -969,6 +1138,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1005,6 +1182,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x367c0000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1058,6 +1244,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 1.0
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 1.0
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1112,6 +1309,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 1.0
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 1.0
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1168,6 +1376,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v2, v4
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1224,6 +1444,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v4
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1283,6 +1515,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1338,6 +1581,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1386,6 +1640,15 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1440,6 +1703,12 @@ define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: no_mix_simple:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: no_mix_simple:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1474,6 +1743,12 @@ define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: no_mix_simple_fabs:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: no_mix_simple_fabs:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1516,6 +1791,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1568,6 +1852,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, fl
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1628,6 +1920,16 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0,
; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1691,6 +1993,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half
; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1741,6 +2052,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, hal
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1791,6 +2111,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1838,6 +2166,15 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1854,6 +2191,15 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1
; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1910,6 +2256,15 @@ define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1966,6 +2321,15 @@ define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2021,6 +2385,15 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2038,6 +2411,16 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2087,6 +2470,15 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2104,6 +2496,16 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2153,6 +2555,15 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg,
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2170,6 +2581,16 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg,
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_or_b32_e32 v0, 0x80008000, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
index 0878fc6..b08586e 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
@@ -1,15 +1,13 @@
-; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s
-; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s
; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s
; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s
; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
-; GFX906-LABEL: image_sample_test:
-; GFX906: image_sample_lz
-
-; GFX908-LABEL: image_sample_test:
-; GFX908: image_sample_lz
+; GFX9-LABEL: image_sample_test:
+; GFX9: image_sample_lz
; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 7fb33ca..4c2b447 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -238,6 +238,23 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1201 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1201 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX9_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_3_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_3_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_3_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX11_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX11_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX11_GENERIC %s
+
+
# ELF-R600-ALL: Format: elf32-amdgpu
# ELF-R600-ALL: Arch: r600
# ELF-R600-ALL: AddressSize: 32bit
@@ -435,6 +452,18 @@
# ELF-AMDGCN-GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
# YAML-AMDGCN-GFX1201: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1201 ]
+# ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
+# YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ]
+
+# ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
+# YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ]
+
+# ELF-AMDGCN-GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
+# YAML-AMDGCN-GFX10_3_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC ]
+
+# ELF-AMDGCN-GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
+# YAML-AMDGCN-GFX11_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC ]
+
# ELF-AMDGCN-ALL: ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index e296d7f..ca136a6 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -18,6 +18,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX11--------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx11-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -49,6 +54,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: diff %t-specify.txt %t-detect.txt
; ----------------------------------GFX10--------------------------------------
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.3-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.3-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1036 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -84,6 +94,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.1-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.1-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -107,6 +122,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index e2266d8..7fbf4aa 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -253,6 +253,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC -DFLAG_VALUE=0x52
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
@@ -322,6 +325,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC -DFLAG_VALUE=0x53
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
@@ -355,6 +361,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0x51
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
@@ -391,6 +400,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 -DFLAG_VALUE=0x44
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC -DFLAG_VALUE=0x54
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 -DFLAG_VALUE=0x43
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 82bb12f..8e68f08 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1559,68 +1559,72 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
};
// clang-format off
-#define AMDGPU_MACH_ENUM_ENTS \
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201")
+#define AMDGPU_MACH_ENUM_ENTS \
+ ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10.1-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10.3-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic")
// clang-format on
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {