diff options
author | Tamar Christina <tamar.christina@arm.com> | 2023-11-21 13:20:39 +0000 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2023-11-21 13:25:10 +0000 |
commit | 33c2b70dbabc02788caabcbc66b7baeafeb95bcf (patch) | |
tree | e0750c6d8af0977722a8dd2236369d2d8358d31c | |
parent | e5678468e550e99944fca6bae364696714ffb445 (diff) | |
download | gcc-33c2b70dbabc02788caabcbc66b7baeafeb95bcf.zip gcc-33c2b70dbabc02788caabcbc66b7baeafeb95bcf.tar.gz gcc-33c2b70dbabc02788caabcbc66b7baeafeb95bcf.tar.bz2 |
AArch64: Add new generic-armv8-a CPU and make it the default.
This patch adds a new generic scheduling model "generic-armv8-a" and makes it
the default for all Armv8 architectures.
-mcpu=generic and -mtune=generic is kept around for those that really want the
previous cost model.
This shows on SPECCPU 2017 the following:
generic: SPECINT 1.0% improvement in geomean, SPECFP -0.6%. The SPECFP is due
to fotonik3d_r where we vectorize an FP calculation that only ever
needs one lane of the result. This I believe is a generic costing bug
but at the moment we can't change costs of FP and INT independently.
So will defer updating that cost to stage3 after Richard's other
costing updates land.
generic SVE: SPECINT 1.1% improvement in geomean, SPECFP 0.7% improvement.
gcc/ChangeLog:
PR target/111370
* config/aarch64/aarch64-arches.def (armv8-9, armv8-a, armv8.1-a,
armv8.2-a, armv8.3-a, armv8.4-a, armv8.5-a, armv8.6-a, armv8.7-a,
armv8.8-a): Update to generic_armv8_a.
* config/aarch64/aarch64-cores.def (generic-armv8-a): New.
* config/aarch64/aarch64-tune.md: Regenerate.
* config/aarch64/aarch64.cc: Include generic_armv8_a.h
* config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Change to
TARGET_CPU_generic_armv8_a.
* config/aarch64/tuning_models/generic_armv8_a.h: New file.
gcc/testsuite/ChangeLog:
PR target/111370
* gcc.target/aarch64/sve/cond_asrd_1.c: Updated.
* gcc.target/aarch64/sve/cond_cnot_4.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_5.c: Likewise.
* gcc.target/aarch64/sve/cond_uxt_5.c: Likewise.
* gcc.target/aarch64/target_attr_13.c: Likewise.
* gcc.target/aarch64/target_attr_15.c: Likewise.
-rw-r--r-- | gcc/config/aarch64/aarch64-arches.def | 28 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-cores.def | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-tune.md | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.h | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/tuning_models/generic_armv8_a.h | 191 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/target_attr_13.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/target_attr_15.c | 2 |
12 files changed, 215 insertions, 22 deletions
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index 7ae92aa..f89e4ea 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -30,19 +30,19 @@ Due to the assumptions about the positions of these fields in config.gcc, NAME should be kept as the first argument. */ -AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD)) -AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA)) -AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, (V8_1A)) -AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH, RCPC)) -AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) -AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) -AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, (V8_5A, I8MM, BF16)) -AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, (V8_6A, LS64)) -AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, (V8_7A, MOPS)) -AARCH64_ARCH("armv8-r", generic, V8R , 8, (V8_4A)) -AARCH64_ARCH("armv9-a", generic, V9A , 9, (V8_5A, SVE2)) -AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, (V8_6A, V9A)) -AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, (V8_7A, V9_1A)) -AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, (V8_8A, V9_2A)) +AARCH64_ARCH("armv8-a", generic_armv8_a, V8A, 8, (SIMD)) +AARCH64_ARCH("armv8.1-a", generic_armv8_a, V8_1A, 8, (V8A, LSE, CRC, RDMA)) +AARCH64_ARCH("armv8.2-a", generic_armv8_a, V8_2A, 8, (V8_1A)) +AARCH64_ARCH("armv8.3-a", generic_armv8_a, V8_3A, 8, (V8_2A, PAUTH, RCPC)) +AARCH64_ARCH("armv8.4-a", generic_armv8_a, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) +AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) +AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) +AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A, LS64)) +AARCH64_ARCH("armv8.8-a", generic_armv8_a, V8_8A, 8, (V8_7A, MOPS)) +AARCH64_ARCH("armv8-r", generic_armv8_a, V8R , 8, (V8_4A)) +AARCH64_ARCH("armv9-a", generic, V9A , 9, (V8_5A, SVE2)) +AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, (V8_6A, V9A)) +AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, (V8_7A, V9_1A)) +AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, (V8_8A, V9_2A)) #undef AARCH64_ARCH diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 3e363bd..30f4dd0 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -191,5 +191,6 @@ AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, /* Generic Architecture Processors. */ AARCH64_CORE("generic", generic, cortexa53, V8A, (), generic, 0x0, 0x0, -1) +AARCH64_CORE("generic-armv8-a", generic_armv8_a, cortexa53, V8A, (), generic_armv8_a, 0x0, 0x0, -1) #undef AARCH64_CORE diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index cd5d79e..0a32056 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,neoversev2,demeter,generic" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexx2,cortexx3,cortexx4,neoversen2,neoversev2,demeter,generic,generic_armv8_a" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index d1dd65f..3ae8e6d 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -355,6 +355,7 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] = /* Tuning parameters. */ #include "tuning_models/generic.h" +#include "tuning_models/generic_armv8_a.h" #include "tuning_models/cortexa35.h" #include "tuning_models/cortexa53.h" #include "tuning_models/cortexa57.h" diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 145bf53..1ac2989 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -724,7 +724,7 @@ enum target_cpus /* If there is no CPU defined at configure, use generic as default. */ #ifndef TARGET_CPU_DEFAULT -# define TARGET_CPU_DEFAULT TARGET_CPU_generic +# define TARGET_CPU_DEFAULT TARGET_CPU_generic_armv8_a #endif /* If inserting NOP before a mult-accumulate insn remember to adjust the diff --git a/gcc/config/aarch64/tuning_models/generic_armv8_a.h b/gcc/config/aarch64/tuning_models/generic_armv8_a.h new file mode 100644 index 0000000..82abe17 --- /dev/null +++ b/gcc/config/aarch64/tuning_models/generic_armv8_a.h @@ -0,0 +1,191 @@ +/* Tuning model description for AArch64 architecture. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_AARCH64_H_GENERIC_ARMV8_A +#define GCC_AARCH64_H_GENERIC_ARMV8_A + +#include "generic.h" + +static const struct cpu_addrcost_table generic_armv8_a_addrcost_table = +{ + { + 1, /* hi */ + 0, /* si */ + 0, /* di */ + 1, /* ti */ + }, + 0, /* pre_modify */ + 0, /* post_modify */ + 0, /* post_modify_ld3_st3 */ + 0, /* post_modify_ld4_st4 */ + 0, /* register_offset */ + 0, /* register_sextend */ + 0, /* register_zextend */ + 0 /* imm_offset */ +}; + +static const struct cpu_regmove_cost generic_armv8_a_regmove_cost = +{ + 1, /* GP2GP */ + /* Avoid the use of slow int<->fp moves for spilling by setting + their cost higher than memmov_cost. */ + 5, /* GP2FP */ + 5, /* FP2GP */ + 2 /* FP2FP */ +}; + +/* Generic costs for Advanced SIMD vector operations. */ +static const advsimd_vec_cost generic_armv8_a_advsimd_vector_cost = +{ + 1, /* int_stmt_cost */ + 1, /* fp_stmt_cost */ + 0, /* ld2_st2_permute_cost */ + 0, /* ld3_st3_permute_cost */ + 0, /* ld4_st4_permute_cost */ + 2, /* permute_cost */ + 2, /* reduc_i8_cost */ + 2, /* reduc_i16_cost */ + 2, /* reduc_i32_cost */ + 2, /* reduc_i64_cost */ + 2, /* reduc_f16_cost */ + 2, /* reduc_f32_cost */ + 2, /* reduc_f64_cost */ + 2, /* store_elt_extra_cost */ + 2, /* vec_to_scalar_cost */ + 1, /* scalar_to_vec_cost */ + 1, /* align_load_cost */ + 1, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ +}; + +/* Generic costs for SVE vector operations. */ +static const sve_vec_cost generic_armv8_a_sve_vector_cost = +{ + { + 1, /* int_stmt_cost */ + 1, /* fp_stmt_cost */ + 0, /* ld2_st2_permute_cost */ + 0, /* ld3_st3_permute_cost */ + 0, /* ld4_st4_permute_cost */ + 2, /* permute_cost */ + 2, /* reduc_i8_cost */ + 2, /* reduc_i16_cost */ + 2, /* reduc_i32_cost */ + 2, /* reduc_i64_cost */ + 2, /* reduc_f16_cost */ + 2, /* reduc_f32_cost */ + 2, /* reduc_f64_cost */ + 2, /* store_elt_extra_cost */ + 2, /* vec_to_scalar_cost */ + 1, /* scalar_to_vec_cost */ + 1, /* align_load_cost */ + 1, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ + }, + 2, /* clast_cost */ + 2, /* fadda_f16_cost */ + 2, /* fadda_f32_cost */ + 2, /* fadda_f64_cost */ + 4, /* gather_load_x32_cost */ + 2, /* gather_load_x64_cost */ + 1 /* scatter_store_elt_cost */ +}; + +/* Generic costs for vector insn classes. */ +static const struct cpu_vector_cost generic_armv8_a_vector_cost = +{ + 1, /* scalar_int_stmt_cost */ + 1, /* scalar_fp_stmt_cost */ + 1, /* scalar_load_cost */ + 1, /* scalar_store_cost */ + 3, /* cond_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &generic_armv8_a_advsimd_vector_cost, /* advsimd */ + &generic_armv8_a_sve_vector_cost, /* sve */ + nullptr /* issue_info */ +}; + +/* Generic costs for branch instructions. */ +static const struct cpu_branch_cost generic_armv8_a_branch_cost = +{ + 1, /* Predictable. */ + 3 /* Unpredictable. */ +}; + +/* Generic approximation modes. */ +static const cpu_approx_modes generic_armv8_a_approx_modes = +{ + AARCH64_APPROX_NONE, /* division */ + AARCH64_APPROX_NONE, /* sqrt */ + AARCH64_APPROX_NONE /* recip_sqrt */ +}; + +/* Generic prefetch settings (which disable prefetch). */ +static const cpu_prefetch_tune generic_armv8_a_prefetch_tune = +{ + 0, /* num_slots */ + -1, /* l1_cache_size */ + -1, /* l1_cache_line_size */ + -1, /* l2_cache_size */ + true, /* prefetch_dynamic_strides */ + -1, /* minimum_stride */ + -1 /* default_opt_level */ +}; + +static const struct tune_params generic_armv8_a_tunings = +{ + &cortexa76_extra_costs, + &generic_armv8_a_addrcost_table, + &generic_armv8_a_regmove_cost, + &generic_armv8_a_vector_cost, + &generic_armv8_a_branch_cost, + &generic_armv8_a_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ + { 4, /* load_int. */ + 2, /* store_int. */ + 5, /* load_fp. */ + 2, /* store_fp. */ + 4, /* load_pred. */ + 4 /* store_pred. */ + }, /* memmov_cost. */ + 3, /* issue_rate */ + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */ + "32:16", /* function_align. */ + "4", /* jump_align. */ + "32:16", /* loop_align. */ + 2, /* int_reassoc_width. */ + 4, /* fp_reassoc_width. */ + 1, /* fma_reassoc_width. */ + 2, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ + 2, /* min_div_recip_mul_df. */ + 0, /* max_case_values. */ + tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ + (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND + | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS + | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS + | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */ + &generic_prefetch_tune, + AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ + AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ +}; + +#endif /* GCC_AARCH64_H_GENERIC_ARMV8_A. */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c index aac06bd..96e9935 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_asrd_1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 --param=aarch64-autovec-preference=2" } */ #include <stdint.h> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c index f627891..6f969a8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 --param=aarch64-autovec-preference=2" } */ #include <stdint.h> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c index 03a6636..e6ec515 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 --param=aarch64-autovec-preference=2" } */ #include <stdint.h> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c index 9a2bd8f..7ed3592 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256" } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 --param=aarch64-autovec-preference=2" } */ #include <stdint.h> diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_13.c b/gcc/testsuite/gcc.target/aarch64/target_attr_13.c index d5bee3a..4bdb167 100644 --- a/gcc/testsuite/gcc.target/aarch64/target_attr_13.c +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_13.c @@ -1,5 +1,5 @@ /* { dg-do assemble } */ -/* { dg-options "-O2 -march=armv8-a+crc+crypto -mcpu=generic" } */ +/* { dg-options "-O2 -mcpu=generic+crypto" } */ #include "arm_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c index 069a001..e6f31ba 100644 --- a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c +++ b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c @@ -1,5 +1,5 @@ /* { dg-do assemble } */ -/* { dg-options "-march=armv8-a+crypto -mcpu=generic -save-temps" } */ +/* { dg-options "-mcpu=generic+crypto -save-temps" } */ /* Check that "+nothing" clears the ISA flags. */ |