aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config.gcc2
-rw-r--r--gcc/config/gcn/gcn-opts.h28
-rw-r--r--gcc/config/gcn/gcn.cc60
-rw-r--r--gcc/config/gcn/gcn.h4
-rw-r--r--gcc/config/gcn/gcn.md8
-rw-r--r--gcc/config/gcn/gcn.opt3
-rw-r--r--gcc/config/gcn/mkoffload.cc4
-rw-r--r--gcc/config/gcn/t-gcn-hsa4
-rw-r--r--gcc/config/gcn/t-omp-device2
-rw-r--r--libgomp/plugin/plugin-gcn.c9
10 files changed, 100 insertions, 24 deletions
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 600ac35..cdbefb5 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4522,7 +4522,7 @@ case "${target}" in
for which in arch tune; do
eval "val=\$with_$which"
case ${val} in
- "" | fiji | gfx900 | gfx906 )
+ "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a)
# OK
;;
*)
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index c080524..b62dfb4 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -23,16 +23,30 @@ enum processor_type
PROCESSOR_FIJI, // gfx803
PROCESSOR_VEGA10, // gfx900
PROCESSOR_VEGA20, // gfx906
- PROCESSOR_GFX908 // as yet unnamed
+ PROCESSOR_GFX908,
+ PROCESSOR_GFX90a
};
/* Set in gcn_option_override. */
-extern int gcn_isa;
-
-#define TARGET_GCN3 (gcn_isa == 3)
-#define TARGET_GCN3_PLUS (gcn_isa >= 3)
-#define TARGET_GCN5 (gcn_isa == 5)
-#define TARGET_GCN5_PLUS (gcn_isa >= 5)
+extern enum gcn_isa {
+ ISA_UNKNOWN,
+ ISA_GCN3,
+ ISA_GCN5,
+ ISA_CDNA1,
+ ISA_CDNA2
+} gcn_isa;
+
+#define TARGET_GCN3 (gcn_isa == ISA_GCN3)
+#define TARGET_GCN3_PLUS (gcn_isa >= ISA_GCN3)
+#define TARGET_GCN5 (gcn_isa == ISA_GCN5)
+#define TARGET_GCN5_PLUS (gcn_isa >= ISA_GCN5)
+#define TARGET_CDNA1 (gcn_isa == ISA_CDNA1)
+#define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1)
+#define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
+#define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
+
+#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
+#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
enum sram_ecc_type
{
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 39a7a96..5e75a1b 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -66,7 +66,7 @@ static bool ext_gcn_constants_init = 0;
/* Holds the ISA variant, derived from the command line parameters. */
-int gcn_isa = 3; /* Default to GCN3. */
+enum gcn_isa gcn_isa = ISA_GCN3; /* Default to GCN3. */
/* Reserve this much space for LDS (for propagating variables from
worker-single mode to worker-partitioned mode), per workgroup. Global
@@ -129,7 +129,13 @@ gcn_option_override (void)
if (!flag_pic)
flag_pic = flag_pie;
- gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5;
+ gcn_isa = (gcn_arch == PROCESSOR_FIJI ? ISA_GCN3
+ : gcn_arch == PROCESSOR_VEGA10 ? ISA_GCN5
+ : gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5
+ : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
+ : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
+ : ISA_UNKNOWN);
+ gcc_assert (gcn_isa != ISA_UNKNOWN);
/* The default stack size needs to be small for offload kernels because
there may be many, many threads. Also, a smaller stack gives a
@@ -2642,6 +2648,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
return gcn_arch == PROCESSOR_VEGA20;
if (strcmp (name, "gfx908") == 0)
return gcn_arch == PROCESSOR_GFX908;
+ if (strcmp (name, "gfx90a") == 0)
+ return gcn_arch == PROCESSOR_GFX90a;
return 0;
default:
gcc_unreachable ();
@@ -3081,13 +3089,35 @@ gcn_expand_prologue ()
/* Ensure that the scheduler doesn't do anything unexpected. */
emit_insn (gen_blockage ());
- /* m0 is initialized for the usual LDS DS and FLAT memory case.
- The low-part is the address of the topmost addressable byte, which is
- size-1. The high-part is an offset and should be zero. */
- emit_move_insn (gen_rtx_REG (SImode, M0_REG),
- gen_int_mode (LDS_SIZE, SImode));
+ if (TARGET_M0_LDS_LIMIT)
+ {
+ /* m0 is initialized for the usual LDS DS and FLAT memory case.
+ The low-part is the address of the topmost addressable byte, which is
+ size-1. The high-part is an offset and should be zero. */
+ emit_move_insn (gen_rtx_REG (SImode, M0_REG),
+ gen_int_mode (LDS_SIZE, SImode));
+
+ emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
+ }
- emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
+ if (TARGET_PACKED_WORK_ITEMS
+ && cfun && cfun->machine && !cfun->machine->normal_function)
+ {
+ /* v0 conatins the X, Y and Z dimensions all in one.
+ Expand them out for ABI compatibility. */
+ /* TODO: implement and use zero_extract. */
+ rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
+ emit_insn (gen_andv64si3 (v1, gen_rtx_REG (V64SImode, VGPR_REGNO (0)),
+ gen_rtx_CONST_INT (VOIDmode, 0x3FF << 10)));
+ emit_insn (gen_lshrv64si3 (v1, v1, gen_rtx_CONST_INT (VOIDmode, 10)));
+ emit_insn (gen_prologue_use (v1));
+
+ rtx v2 = gen_rtx_REG (V64SImode, VGPR_REGNO (2));
+ emit_insn (gen_andv64si3 (v2, gen_rtx_REG (V64SImode, VGPR_REGNO (0)),
+ gen_rtx_CONST_INT (VOIDmode, 0x3FF << 20)));
+ emit_insn (gen_lshrv64si3 (v2, v2, gen_rtx_CONST_INT (VOIDmode, 20)));
+ emit_insn (gen_prologue_use (v2));
+ }
if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
{
@@ -5243,6 +5273,9 @@ output_file_start (void)
case PROCESSOR_GFX908:
cpu = "gfx908";
break;
+ case PROCESSOR_GFX90a:
+ cpu = "gfx90a";
+ break;
default: gcc_unreachable ();
}
@@ -5296,6 +5329,10 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
sgpr = MAX_NORMAL_SGPR_COUNT;
}
+ /* The gfx90a accum_offset field can't represent 0 registers. */
+ if (gcn_arch == PROCESSOR_GFX90a && vgpr < 4)
+ vgpr = 4;
+
fputs ("\t.rodata\n"
"\t.p2align\t6\n"
"\t.amdhsa_kernel\t", file);
@@ -5364,6 +5401,11 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
one 64th the wave-front stack size. */
stack_size_opt / 64,
LDS_SIZE);
+ if (gcn_arch == PROCESSOR_GFX90a)
+ fprintf (file,
+ "\t .amdhsa_accum_offset\t%i\n"
+ "\t .amdhsa_tg_split\t0\n",
+ (vgpr+3)&~3); // I think this means the AGPRs come after the VGPRs
fputs ("\t.end_amdhsa_kernel\n", file);
#if 1
@@ -5392,6 +5434,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
LDS_SIZE,
stack_size_opt / 64,
sgpr, vgpr);
+ if (gcn_arch == PROCESSOR_GFX90a)
+ fprintf (file, " .agpr_count: 0\n"); // AGPRs are not used, yet
fputs (" .end_amdgpu_metadata\n", file);
#endif
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index 9ae8919..a129760 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -24,6 +24,10 @@
builtin_define ("__GCN3__"); \
else if (TARGET_GCN5) \
builtin_define ("__GCN5__"); \
+ else if (TARGET_CDNA1) \
+ builtin_define ("__CDNA1__"); \
+ else if (TARGET_CDNA2) \
+ builtin_define ("__CDNA2__"); \
} \
while(0)
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 21a7476..53e846e 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -1410,7 +1410,7 @@
""
{
if (can_create_pseudo_p ()
- && !TARGET_GCN5
+ && !TARGET_GCN5_PLUS
&& !gcn_inline_immediate_operand (operands[2], SImode))
operands[2] = force_reg (SImode, operands[2]);
@@ -1451,7 +1451,7 @@
(match_operand:SI 1 "register_operand" "Sg,Sg,v"))
(match_operand:DI 2 "gcn_32bit_immediate_operand" "A, B,A"))
(const_int 32))))]
- "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)"
+ "TARGET_GCN5_PLUS || gcn_inline_immediate_operand (operands[2], SImode)"
"@
s_mul_hi<sgnsuffix>0\t%0, %1, %2
s_mul_hi<sgnsuffix>0\t%0, %1, %2
@@ -1469,7 +1469,7 @@
""
{
if (can_create_pseudo_p ()
- && !TARGET_GCN5
+ && !TARGET_GCN5_PLUS
&& !gcn_inline_immediate_operand (operands[2], SImode))
operands[2] = force_reg (SImode, operands[2]);
@@ -1506,7 +1506,7 @@
(match_operand:SI 1 "register_operand" "Sg, Sg, v"))
(match_operand:DI 2 "gcn_32bit_immediate_operand"
"A, B, A")))]
- "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)"
+ "TARGET_GCN5_PLUS || gcn_inline_immediate_operand (operands[2], SImode)"
"#"
"&& reload_completed"
[(const_int 0)]
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index 54da11f..9606aaf 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -37,6 +37,9 @@ Enum(gpu_type) String(gfx906) Value(PROCESSOR_VEGA20)
EnumValue
Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908)
+EnumValue
+Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
+
march=
Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI)
Specify the name of the target GPU.
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index e98277c4..ed93ae8 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -55,6 +55,8 @@
#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
#undef EF_AMDGPU_MACH_AMDGCN_GFX908
#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
+#undef EF_AMDGPU_MACH_AMDGCN_GFX90a
+#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
#define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
#define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
@@ -904,6 +906,8 @@ main (int argc, char **argv)
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
else if (strcmp (argv[i], "-march=gfx908") == 0)
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
+ else if (strcmp (argv[i], "-march=gfx90a") == 0)
+ elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
}
if (!(fopenacc ^ fopenmp))
diff --git a/gcc/config/gcn/t-gcn-hsa b/gcc/config/gcn/t-gcn-hsa
index 10e31f3..9e03ec8 100644
--- a/gcc/config/gcn/t-gcn-hsa
+++ b/gcc/config/gcn/t-gcn-hsa
@@ -42,8 +42,8 @@ ALL_HOST_OBJS += gcn-run.o
gcn-run$(exeext): gcn-run.o
+$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ $< -ldl
-MULTILIB_OPTIONS = march=gfx900/march=gfx906/march=gfx908
-MULTILIB_DIRNAMES = gfx900 gfx906 gfx908
+MULTILIB_OPTIONS = march=gfx900/march=gfx906/march=gfx908/march=gfx90a
+MULTILIB_DIRNAMES = gfx900 gfx906 gfx908 gfx90a
gcn-tree.o: $(srcdir)/config/gcn/gcn-tree.cc
$(COMPILE) $<
diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device
index e1d9e0d..27d36db 100644
--- a/gcc/config/gcn/t-omp-device
+++ b/gcc/config/gcn/t-omp-device
@@ -1,4 +1,4 @@
omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc
echo kind: gpu > $@
echo arch: amdgcn gcn >> $@
- echo isa: fiji gfx900 gfx906 gfx908 >> $@
+ echo isa: fiji gfx900 gfx906 gfx908 gfx90a >> $@
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 2b32f53..1c04368 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -402,7 +402,8 @@ typedef enum {
EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a,
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
- EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030
+ EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
+ EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f
} EF_AMDGPU_MACH;
const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
@@ -1628,6 +1629,7 @@ const static char *gcn_gfx803_s = "gfx803";
const static char *gcn_gfx900_s = "gfx900";
const static char *gcn_gfx906_s = "gfx906";
const static char *gcn_gfx908_s = "gfx908";
+const static char *gcn_gfx90a_s = "gfx90a";
const static int gcn_isa_name_len = 6;
/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
@@ -1645,6 +1647,8 @@ isa_hsa_name (int isa) {
return gcn_gfx906_s;
case EF_AMDGPU_MACH_AMDGCN_GFX908:
return gcn_gfx908_s;
+ case EF_AMDGPU_MACH_AMDGCN_GFX90a:
+ return gcn_gfx90a_s;
}
return NULL;
}
@@ -1681,6 +1685,9 @@ isa_code(const char *isa) {
if (!strncmp (isa, gcn_gfx908_s, gcn_isa_name_len))
return EF_AMDGPU_MACH_AMDGCN_GFX908;
+ if (!strncmp (isa, gcn_gfx90a_s, gcn_isa_name_len))
+ return EF_AMDGPU_MACH_AMDGCN_GFX90a;
+
return -1;
}