aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTobias Burnus <tobias@codesourcery.com>2022-09-29 16:37:52 +0200
committerTobias Burnus <tobias@codesourcery.com>2022-09-29 16:37:52 +0200
commitc455181c13a7b00ee09777287bcf0c8b9de9d1fe (patch)
tree000e41c9c680e245b67649b5e7276e570ecba12b /gcc
parentd21bfef98674abccd204dd2de5159cb3a19ea771 (diff)
parent85adc2ec2b0736d07c0df35ad9a450f97ff59a7c (diff)
downloadgcc-c455181c13a7b00ee09777287bcf0c8b9de9d1fe.zip
gcc-c455181c13a7b00ee09777287bcf0c8b9de9d1fe.tar.gz
gcc-c455181c13a7b00ee09777287bcf0c8b9de9d1fe.tar.bz2
Merge branch 'releases/gcc-12' into devel/omp/gcc-12
Merged up to r12-8794-g85adc2ec2b0736d07c0df35ad9a450f97ff59a7c (29th Sept 2022) This includes r12-8793-gafea1ae84f0 (cherry-picked from r13-2868-gd3df98807b5) "OpenACC: Fix reduction tree-sharing issue [PR106982]". However, due to omp-low.cc changes, it neither applies cleanly nor it required to make the testcases pass. This merge adds the testcases - but due to conflicts under a different filename: gcc/testsuite/c-c++-common/goacc/reduction-7.c added as ...-9.c and ...-8.c added as ...-10.c.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog24
-rw-r--r--gcc/DATESTAMP2
-rw-r--r--gcc/config/aarch64/aarch64-cores.def3
-rw-r--r--gcc/config/aarch64/aarch64-tune.md2
-rw-r--r--gcc/config/aarch64/aarch64.cc40
-rw-r--r--gcc/doc/invoke.texi2
-rw-r--r--gcc/omp-low.cc3
-rw-r--r--gcc/testsuite/c-c++-common/goacc/reduction-10.c12
-rw-r--r--gcc/testsuite/c-c++-common/goacc/reduction-9.c22
9 files changed, 85 insertions, 25 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index dabbe18..cb12da9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,27 @@
+2022-09-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ Backported from master:
+ 2022-09-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * config/aarch64/aarch64-cores.def (neoverse-v2): New entry.
+ (demeter): Update tunings to neoversev2.
+ * config/aarch64/aarch64-tune.md: Regenerate.
+ * config/aarch64/aarch64.cc (demeter_addrcost_table): Rename to
+ neoversev2_addrcost_table.
+ (demeter_regmove_cost): Rename to neoversev2_addrcost_table.
+ (demeter_advsimd_vector_cost): Rename to neoversev2_advsimd_vector_cost.
+ (demeter_sve_vector_cost): Rename to neoversev2_sve_vector_cost.
+ (demeter_scalar_issue_info): Rename to neoversev2_scalar_issue_info.
+ (demeter_advsimd_issue_info): Rename to neoversev2_advsimd_issue_info.
+ (demeter_sve_issue_info): Rename to neoversev2_sve_issue_info.
+ (demeter_vec_issue_info): Rename to neoversev2_vec_issue_info.
+ Update references to above.
+ (demeter_vector_cost): Rename to neoversev2_vector_cost.
+ (demeter_tunings): Rename to neoversev2_tunings.
+ (aarch64_vec_op_count::rename_cycles_per_iter): Use
+ neoversev2_sve_issue_info instead of demeter_sve_issue_info.
+ * doc/invoke.texi (AArch64 Options): Document neoverse-v2.
+
2022-09-21 Richard Sandiford <richard.sandiford@arm.com>
Backported from master:
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 8830fa8..ea4b4d1 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20220928
+20220929
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 41d9535..0402bfb 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -171,6 +171,7 @@ AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARC
AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
-AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, demeter, 0x41, 0xd4f, -1)
+AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
#undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 27da961..84e9bbf 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index f85bf2f..0946864 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -537,7 +537,7 @@ static const struct cpu_addrcost_table neoversen2_addrcost_table =
0 /* imm_offset */
};
-static const struct cpu_addrcost_table demeter_addrcost_table =
+static const struct cpu_addrcost_table neoversev2_addrcost_table =
{
{
1, /* hi */
@@ -680,7 +680,7 @@ static const struct cpu_regmove_cost neoversev1_regmove_cost =
2 /* FP2FP */
};
-static const struct cpu_regmove_cost demeter_regmove_cost =
+static const struct cpu_regmove_cost neoversev2_regmove_cost =
{
1, /* GP2GP */
/* Spilling to int<->fp instead of memory is recommended so set
@@ -2429,7 +2429,7 @@ static const struct tune_params neoversen2_tunings =
&generic_prefetch_tune
};
-static const advsimd_vec_cost demeter_advsimd_vector_cost =
+static const advsimd_vec_cost neoversev2_advsimd_vector_cost =
{
2, /* int_stmt_cost */
2, /* fp_stmt_cost */
@@ -2460,7 +2460,7 @@ static const advsimd_vec_cost demeter_advsimd_vector_cost =
1 /* store_cost */
};
-static const sve_vec_cost demeter_sve_vector_cost =
+static const sve_vec_cost neoversev2_sve_vector_cost =
{
{
2, /* int_stmt_cost */
@@ -2517,7 +2517,7 @@ static const sve_vec_cost demeter_sve_vector_cost =
3 /* scatter_store_elt_cost */
};
-static const aarch64_scalar_vec_issue_info demeter_scalar_issue_info =
+static const aarch64_scalar_vec_issue_info neoversev2_scalar_issue_info =
{
3, /* loads_stores_per_cycle */
2, /* stores_per_cycle */
@@ -2526,7 +2526,7 @@ static const aarch64_scalar_vec_issue_info demeter_scalar_issue_info =
1 /* fp_simd_store_general_ops */
};
-static const aarch64_advsimd_vec_issue_info demeter_advsimd_issue_info =
+static const aarch64_advsimd_vec_issue_info neoversev2_advsimd_issue_info =
{
{
3, /* loads_stores_per_cycle */
@@ -2540,7 +2540,7 @@ static const aarch64_advsimd_vec_issue_info demeter_advsimd_issue_info =
3 /* ld4_st4_general_ops */
};
-static const aarch64_sve_vec_issue_info demeter_sve_issue_info =
+static const aarch64_sve_vec_issue_info neoversev2_sve_issue_info =
{
{
{
@@ -2562,15 +2562,15 @@ static const aarch64_sve_vec_issue_info demeter_sve_issue_info =
1 /* gather_scatter_pair_pred_ops */
};
-static const aarch64_vec_issue_info demeter_vec_issue_info =
+static const aarch64_vec_issue_info neoversev2_vec_issue_info =
{
- &demeter_scalar_issue_info,
- &demeter_advsimd_issue_info,
- &demeter_sve_issue_info
+ &neoversev2_scalar_issue_info,
+ &neoversev2_advsimd_issue_info,
+ &neoversev2_sve_issue_info
};
/* Demeter costs for vector insn classes. */
-static const struct cpu_vector_cost demeter_vector_cost =
+static const struct cpu_vector_cost neoversev2_vector_cost =
{
1, /* scalar_int_stmt_cost */
2, /* scalar_fp_stmt_cost */
@@ -2578,17 +2578,17 @@ static const struct cpu_vector_cost demeter_vector_cost =
1, /* scalar_store_cost */
1, /* cond_taken_branch_cost */
1, /* cond_not_taken_branch_cost */
- &demeter_advsimd_vector_cost, /* advsimd */
- &demeter_sve_vector_cost, /* sve */
- &demeter_vec_issue_info /* issue_info */
+ &neoversev2_advsimd_vector_cost, /* advsimd */
+ &neoversev2_sve_vector_cost, /* sve */
+ &neoversev2_vec_issue_info /* issue_info */
};
-static const struct tune_params demeter_tunings =
+static const struct tune_params neoversev2_tunings =
{
&cortexa76_extra_costs,
- &demeter_addrcost_table,
- &demeter_regmove_cost,
- &demeter_vector_cost,
+ &neoversev2_addrcost_table,
+ &neoversev2_regmove_cost,
+ &neoversev2_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
SVE_128, /* sve_width */
@@ -15532,7 +15532,7 @@ aarch64_vec_op_count::rename_cycles_per_iter () const
{
if (sve_issue_info () == &neoverse512tvb_sve_issue_info
|| sve_issue_info () == &neoversen2_sve_issue_info
- || sve_issue_info () == &demeter_sve_issue_info)
+ || sve_issue_info () == &neoversev2_sve_issue_info)
/* + 1 for an addition. We've already counted a general op for each
store, so we don't need to account for stores separately. The branch
reads no registers and so does not need to be counted either.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 281cafc..52ba1c8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19260,7 +19260,7 @@ performance of the code. Permissible values for this option are:
@samp{cortex-a78}, @samp{cortex-a78ae}, @samp{cortex-a78c},
@samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
@samp{neoverse-512tvb}, @samp{neoverse-e1}, @samp{neoverse-n1},
-@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{qdf24xx},
+@samp{neoverse-n2}, @samp{neoverse-v1}, @samp{neoverse-v2}, @samp{qdf24xx},
@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan},
@samp{octeontx}, @samp{octeontx81}, @samp{octeontx83},
@samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 85a462a..d726eea 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -8197,7 +8197,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, setup_code,
unshare_expr (ref_to_res),
- incoming, level, op, off);
+ unshare_expr (incoming),
+ level, op, off);
tree init_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, init_code,
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-10.c b/gcc/testsuite/c-c++-common/goacc/reduction-10.c
new file mode 100644
index 0000000..2c3ed49
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-10.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+
+/* PR middle-end/106982 */
+
+void test1(double *c)
+{
+ double reduced[5];
+#pragma acc parallel loop gang private(reduced)
+ for (int x = 0; x < 5; ++x)
+#pragma acc loop worker reduction(*:reduced)
+ for (int y = 0; y < 5; ++y) { }
+}
diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-9.c b/gcc/testsuite/c-c++-common/goacc/reduction-9.c
new file mode 100644
index 0000000..482b0ab
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/reduction-9.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+
+/* PR middle-end/106982 */
+
+long long n = 100;
+int multiplicitive_n = 128;
+
+void test1(double *rand, double *a, double *b, double *c)
+{
+#pragma acc data copyin(a[0:10*multiplicitive_n], b[0:10*multiplicitive_n]) copyout(c[0:10])
+ {
+#pragma acc parallel loop
+ for (int i = 0; i < 10; ++i)
+ {
+ double temp = 1.0;
+#pragma acc loop vector reduction(*:temp)
+ for (int j = 0; j < multiplicitive_n; ++j)
+ temp *= a[(i * multiplicitive_n) + j] + b[(i * multiplicitive_n) + j];
+ c[i] = temp;
+ }
+ }
+}