aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def12
-rw-r--r--gcc/config/aarch64/aarch64-protos.h1
-rw-r--r--gcc/config/aarch64/aarch64-sme.md12
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sme.def3
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc3
-rw-r--r--gcc/config/aarch64/aarch64-sve.md276
-rw-r--r--gcc/config/aarch64/aarch64.cc90
-rw-r--r--gcc/config/aarch64/tuning_models/generic_armv9_a.h2
-rw-r--r--gcc/config/avr/avr-passes.cc139
-rw-r--r--gcc/config/avr/avr-passes.def8
-rw-r--r--gcc/config/avr/avr-protos.h1
-rw-r--r--gcc/config/avr/avr.cc26
-rw-r--r--gcc/config/avr/avr.opt4
-rw-r--r--gcc/config/avr/avr.opt.urls3
-rw-r--r--gcc/config/gcn/gcn.cc18
-rw-r--r--gcc/config/i386/i386-features.cc2
-rw-r--r--gcc/config/i386/i386-modes.def2
-rw-r--r--gcc/config/i386/i386-options.cc35
-rw-r--r--gcc/config/i386/i386.cc70
-rw-r--r--gcc/config/i386/i386.md3
-rw-r--r--gcc/config/i386/sse.md13
-rwxr-xr-xgcc/config/riscv/arch-canonicalize2
-rw-r--r--gcc/config/riscv/gen-riscv-mcpu-texi.cc43
-rw-r--r--gcc/config/riscv/gen-riscv-mtune-texi.cc41
-rw-r--r--gcc/config/riscv/riscv-vector-costs.cc16
-rw-r--r--gcc/config/riscv/t-riscv37
-rw-r--r--gcc/config/rs6000/rs6000.cc25
-rw-r--r--gcc/config/rs6000/rs6000.md2
-rw-r--r--gcc/config/s390/s390-protos.h2
-rw-r--r--gcc/config/s390/s390.cc161
-rw-r--r--gcc/config/s390/s390.md21
31 files changed, 851 insertions, 222 deletions
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 1c3e697..db88df0 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -128,7 +128,9 @@ AARCH64_OPT_FMV_EXTENSION("sha2", SHA2, (SIMD), (), (), "sha1 sha2")
AARCH64_FMV_FEATURE("sha3", SHA3, (SHA3))
-AARCH64_OPT_FMV_EXTENSION("aes", AES, (SIMD), (), (), "aes")
+AARCH64_OPT_EXTENSION("aes", AES, (SIMD), (), (), "aes")
+
+AARCH64_FMV_FEATURE("aes", PMULL, (AES))
/* +nocrypto disables AES, SHA2 and SM4, and anything that depends on them
(such as SHA3 and the SVE2 crypto extensions). */
@@ -171,8 +173,6 @@ AARCH64_OPT_FMV_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm")
instructions. */
AARCH64_OPT_FMV_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16")
-AARCH64_FMV_FEATURE("rpres", RPRES, ())
-
AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16, FCMA), (), (), "sve")
/* This specifically does not imply +sve. */
@@ -190,7 +190,7 @@ AARCH64_OPT_FMV_EXTENSION("sve2", SVE2, (SVE), (), (), "sve2")
AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), (), (), "sveaes")
-AARCH64_FMV_FEATURE("sve2-aes", SVE_AES, (SVE2_AES))
+AARCH64_FMV_FEATURE("sve2-aes", SVE_PMULL128, (SVE2_AES))
AARCH64_OPT_EXTENSION("sve2-bitperm", SVE2_BITPERM, (SVE2), (), (),
"svebitperm")
@@ -245,9 +245,9 @@ AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "sme
AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "smef16f16")
-AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "mops")
+AARCH64_OPT_FMV_EXTENSION("mops", MOPS, (), (), (), "mops")
-AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
+AARCH64_OPT_FMV_EXTENSION("cssc", CSSC, (), (), (), "cssc")
AARCH64_OPT_EXTENSION("cmpbr", CMPBR, (), (), (), "cmpbr")
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e946e8d..38c307c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode);
bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
rtx aarch64_sve_packed_pred (machine_mode);
rtx aarch64_sve_fp_pred (machine_mode, rtx *);
+rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx);
void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
bool aarch64_expand_maskloadstore (rtx *, machine_mode);
void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index 6b3f439..6b1a747 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -62,6 +62,10 @@
;; (b) they are sometimes used conditionally, particularly in streaming-
;; compatible code.
;;
+;; To prevent the latter from upsetting the assembler, we emit the literal
+;; encodings of "SMSTART SM" and "SMSTOP SM" when compiling without
+;; TARGET_SME.
+;;
;; =========================================================================
;; -------------------------------------------------------------------------
@@ -161,7 +165,9 @@
(clobber (reg:VNx16BI P14_REGNUM))
(clobber (reg:VNx16BI P15_REGNUM))]
""
- "smstart\tsm"
+ {
+ return TARGET_SME ? "smstart\tsm" : ".inst 0xd503437f // smstart sm";
+ }
)
;; Turn off streaming mode. This clobbers all SVE state.
@@ -196,7 +202,9 @@
(clobber (reg:VNx16BI P14_REGNUM))
(clobber (reg:VNx16BI P15_REGNUM))]
""
- "smstop\tsm"
+ {
+ return TARGET_SME ? "smstop\tsm" : ".inst 0xd503427f // smstop sm";
+ }
)
;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index 8e6aadc..117b70e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -92,7 +92,8 @@ DEF_SME_FUNCTION (svstr_zt, str_zt, none, none)
DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 && AARCH64_FL_FAMINMAX)
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 \
+ | AARCH64_FL_FAMINMAX)
DEF_SME_FUNCTION_GS (svamin, binary_opt_single_n, all_float, x24, none)
DEF_SME_FUNCTION_GS (svamax, binary_opt_single_n, all_float, x24, none)
#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 2b627a9..01833a8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -4004,7 +4004,8 @@ rtx
function_expander::get_reg_target ()
{
machine_mode target_mode = result_mode ();
- if (!possible_target || GET_MODE (possible_target) != target_mode)
+ if (!possible_target
+ || !register_operand (possible_target, target_mode))
possible_target = gen_reg_rtx (target_mode);
return possible_target;
}
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b252eef..80a3288 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5605,18 +5605,21 @@
;; Predicated floating-point operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
- (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
+ (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+ (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
+ {
+ operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
+ }
)
;; Predicated floating-point operations, merging with the first input.
@@ -5644,14 +5647,14 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5687,14 +5690,14 @@
)
(define_insn "*cond_<optab><mode>_2_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5730,14 +5733,14 @@
)
(define_insn "*cond_<optab><mode>_3_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 3)]
UNSPEC_SEL))]
@@ -5794,16 +5797,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -5868,16 +5871,16 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 4 ]
@@ -5953,14 +5956,14 @@
)
(define_insn "*cond_add<mode>_2_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
(match_dup 2)]
UNSPEC_SEL))]
@@ -6015,16 +6018,16 @@
)
(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 3 , 4 ]
@@ -6266,14 +6269,14 @@
)
(define_insn "*cond_sub<mode>_3_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
(match_dup 3)]
UNSPEC_SEL))]
@@ -6323,16 +6326,16 @@
)
(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
{@ [ cons: =0 , 1 , 3 , 4 ]
@@ -6913,7 +6916,7 @@
;; Predicate AND. We can reuse one of the inputs as the GP.
;; Doubling the second operand is the preferred implementation
;; of the MOV alias, so we use that instead of %1/z, %1, %2.
-(define_insn "and<mode>3"
+(define_insn "@and<mode>3"
[(set (match_operand:PRED_ALL 0 "register_operand")
(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
(match_operand:PRED_ALL 2 "register_operand")))]
@@ -7595,29 +7598,29 @@
;; Unpredicated floating-point ternary operations.
(define_expand "<optab><mode>4"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 4)
- (const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 1 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_dup 5)
+ (match_operand:SVE_F_B16B16 1 "register_operand")
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]);
}
)
;; Predicated floating-point ternary operations.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 5 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ]
@@ -7631,17 +7634,17 @@
;; Predicated floating-point ternary operations with merging.
(define_expand "@cond_<optab><mode>"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
@@ -7649,20 +7652,22 @@
second of the two. */
if (rtx_equal_p (operands[3], operands[5]))
std::swap (operands[2], operands[3]);
+
+ operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
})
;; Predicated floating-point ternary operations, merging with the
;; first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7678,15 +7683,15 @@
)
(define_insn "*cond_<optab><mode>_2_strict"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")
- (match_operand:SVE_FULL_F 4 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -7700,15 +7705,15 @@
;; Predicated floating-point ternary operations, merging with the
;; third input.
(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7724,15 +7729,15 @@
)
(define_insn "*cond_<optab><mode>_4_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
(match_dup 4)]
UNSPEC_SEL))]
@@ -7746,17 +7751,17 @@
;; Predicated floating-point ternary operations, merging with an
;; independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 6)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -7792,17 +7797,17 @@
)
(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 1)
(const_int SVE_STRICT_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")
+ (match_operand:SVE_F_B16B16 4 "register_operand")]
SVE_COND_FP_TERNARY)
- (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -8201,20 +8206,23 @@
;;
;; For unpacked vectors, it doesn't really matter whether SEL uses the
;; the container size or the element size. If SEL used the container size,
-;; it would ignore undefined bits of the predicate but would copy the
-;; upper (undefined) bits of each container along with the defined bits.
-;; If SEL used the element size, it would use undefined bits of the predicate
-;; to select between undefined elements in each input vector. Thus the only
-;; difference is whether the undefined bits in a container always come from
-;; the same input as the defined bits, or whether the choice can vary
-;; independently of the defined bits.
+;; it would would copy the upper (undefined) bits of each container along
+;; with the corresponding defined bits. If SEL used the element size,
+;; it would use separate predicate bits to select between the undefined
+;; elements in each input vector; these seperate predicate bits might
+;; themselves be undefined, depending on the mode of the predicate.
+;;
+;; Thus the only difference is whether the undefined bits in a container
+;; always come from the same input as the defined bits, or whether the
+;; choice can vary independently of the defined bits.
;;
;; For the other instructions, using the element size is more natural,
;; so we do that for SEL as well.
+;;
(define_insn "*vcond_mask_<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "register_operand")
(unspec:SVE_ALL
- [(match_operand:<VPRED> 3 "register_operand")
+ [(match_operand:<VPRED> 3 "aarch64_predicate_operand")
(match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
(match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index cb1699a..f4a2062 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3933,6 +3933,33 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness)
return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode));
}
+/* PRED is a predicate that governs an operation on DATA_MODE. If DATA_MODE
+ is a partial vector mode, and if exceptions must be suppressed for its
+ undefined elements, convert PRED from a container-level predicate to
+ an element-level predicate and ensure that the undefined elements
+ are inactive. Make no changes otherwise.
+
+ Return the resultant predicate. */
+rtx
+aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred)
+{
+ unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+ if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+ {
+ /* Generate an element-level mask. */
+ rtx mask = aarch64_sve_packed_pred (data_mode);
+ machine_mode pmode = GET_MODE (mask);
+
+ /* Apply the existing predicate. */
+ rtx dst = gen_reg_rtx (pmode);
+ emit_insn (gen_and3 (pmode, dst, mask,
+ gen_lowpart (pmode, pred)));
+ return dst;
+ }
+
+ return pred;
+}
+
/* Emit a comparison CMP between OP0 and OP1, both of which have mode
DATA_MODE, and return the result in a predicate of mode PRED_MODE.
Use TARGET as the target register if nonnull and convenient. */
@@ -17166,8 +17193,8 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
&& STMT_VINFO_DATA_REF (stmt_info))
{
stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
- if (stmt_info
- && vect_mem_access_type (stmt_info, node) == VMAT_LOAD_STORE_LANES)
+ if (node
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES)
return DR_GROUP_SIZE (stmt_info);
}
return 0;
@@ -17438,8 +17465,9 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
for each element. We therefore need to divide the full-instruction
cost by the number of elements in the vector. */
if (kind == scalar_load
+ && node
&& sve_costs
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
{
unsigned int nunits = vect_nunits_for_cost (vectype);
/* Test for VNx2 modes, which have 64-bit containers. */
@@ -17451,8 +17479,9 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
/* Detect cases in which a scalar_store is really storing one element
in a scatter operation. */
if (kind == scalar_store
+ && node
&& sve_costs
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
return sve_costs->scatter_store_elt_cost;
/* Detect cases in which vec_to_scalar represents an in-loop reduction. */
@@ -17708,7 +17737,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
if (stmt_info
&& kind == vec_to_scalar
&& (m_vec_flags & VEC_ADVSIMD)
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
{
auto dr = STMT_VINFO_DATA_REF (stmt_info);
tree dr_ref = DR_REF (dr);
@@ -17823,7 +17852,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
if (stmt_info
&& sve_issue
&& (kind == scalar_load || kind == scalar_store)
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
{
unsigned int pairs = CEIL (count, 2);
ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
@@ -17978,9 +18007,10 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
/* Check if we've seen an SVE gather/scatter operation and which size. */
if (kind == scalar_load
+ && node
&& vectype
&& aarch64_sve_mode_p (TYPE_MODE (vectype))
- && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
{
const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve;
if (sve_costs)
@@ -20482,6 +20512,8 @@ aarch64_compare_version_priority (tree decl1, tree decl2)
unsigned long _size; // Size of the struct, so it can grow.
unsigned long _hwcap;
unsigned long _hwcap2;
+ unsigned long _hwcap3;
+ unsigned long _hwcap4;
}
*/
@@ -20498,14 +20530,24 @@ build_ifunc_arg_type ()
tree field3 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
get_identifier ("_hwcap2"),
long_unsigned_type_node);
+ tree field4 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+ get_identifier ("_hwcap3"),
+ long_unsigned_type_node);
+ tree field5 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+ get_identifier ("_hwcap4"),
+ long_unsigned_type_node);
DECL_FIELD_CONTEXT (field1) = ifunc_arg_type;
DECL_FIELD_CONTEXT (field2) = ifunc_arg_type;
DECL_FIELD_CONTEXT (field3) = ifunc_arg_type;
+ DECL_FIELD_CONTEXT (field4) = ifunc_arg_type;
+ DECL_FIELD_CONTEXT (field5) = ifunc_arg_type;
TYPE_FIELDS (ifunc_arg_type) = field1;
DECL_CHAIN (field1) = field2;
DECL_CHAIN (field2) = field3;
+ DECL_CHAIN (field3) = field4;
+ DECL_CHAIN (field4) = field5;
layout_type (ifunc_arg_type);
@@ -31964,9 +32006,43 @@ aarch64_test_sysreg_encoding_clashes (void)
static void
aarch64_test_sve_folding ()
{
+ aarch64_target_switcher switcher (AARCH64_FL_SVE);
+
tree res = fold_unary (BIT_NOT_EXPR, ssizetype,
ssize_int (poly_int64 (1, 1)));
ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1))));
+
+ auto build_v16bi = [](bool a, bool b)
+ {
+ rtx_vector_builder builder (VNx16BImode, 2, 1);
+ builder.quick_push (a ? const1_rtx : const0_rtx);
+ builder.quick_push (b ? const1_rtx : const0_rtx);
+ return builder.build ();
+ };
+ rtx v16bi_10 = build_v16bi (1, 0);
+ rtx v16bi_01 = build_v16bi (0, 1);
+
+ for (auto mode : { VNx8BImode, VNx4BImode, VNx2BImode })
+ {
+ rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
+ rtx subreg = lowpart_subreg (VNx16BImode, reg, mode);
+ rtx and1 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_10);
+ ASSERT_EQ (lowpart_subreg (mode, and1, VNx16BImode), reg);
+ rtx and0 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, and0, VNx16BImode), CONST0_RTX (mode));
+
+ rtx ior1 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_10);
+ ASSERT_EQ (lowpart_subreg (mode, ior1, VNx16BImode), CONSTM1_RTX (mode));
+ rtx ior0 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, ior0, VNx16BImode), reg);
+
+ rtx xor1 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_10);
+ ASSERT_RTX_EQ (lowpart_subreg (mode, xor1, VNx16BImode),
+ lowpart_subreg (mode, gen_rtx_NOT (VNx16BImode, subreg),
+ VNx16BImode));
+ rtx xor0 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_01);
+ ASSERT_EQ (lowpart_subreg (mode, xor0, VNx16BImode), reg);
+ }
}
/* Run all target-specific selftests. */
diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
index f76a250..9eb1a20 100644
--- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
@@ -26,7 +26,7 @@
static const struct cpu_addrcost_table generic_armv9_a_addrcost_table =
{
{
- 1, /* hi */
+ 0, /* hi */
0, /* si */
0, /* di */
1, /* ti */
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 6a88a27..69df6d2 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4843,6 +4843,137 @@ avr_pass_fuse_add::execute1 (function *func)
//////////////////////////////////////////////////////////////////////////////
+// Fuse 2 move insns after combine.
+
+static const pass_data avr_pass_data_2moves =
+{
+ RTL_PASS, // type
+ "", // name (will be patched)
+ OPTGROUP_NONE, // optinfo_flags
+ TV_DF_SCAN, // tv_id
+ 0, // properties_required
+ 0, // properties_provided
+ 0, // properties_destroyed
+ 0, // todo_flags_start
+ 0 // todo_flags_finish
+};
+
+class avr_pass_2moves : public rtl_opt_pass
+{
+public:
+ avr_pass_2moves (gcc::context *ctxt, const char *name)
+ : rtl_opt_pass (avr_pass_data_2moves, ctxt)
+ {
+ this->name = name;
+ }
+
+ unsigned int execute (function *func) final override
+ {
+ if (optimize && avropt_fuse_move2)
+ {
+ bool changed = false;
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, func)
+ {
+ changed |= optimize_2moves_bb (bb);
+ }
+
+ if (changed)
+ {
+ df_note_add_problem ();
+ df_analyze ();
+ }
+ }
+
+ return 0;
+ }
+
+ bool optimize_2moves (rtx_insn *, rtx_insn *);
+ bool optimize_2moves_bb (basic_block);
+}; // avr_pass_2moves
+
+bool
+avr_pass_2moves::optimize_2moves_bb (basic_block bb)
+{
+ bool changed = false;
+ rtx_insn *insn1 = nullptr;
+ rtx_insn *insn2 = nullptr;
+ rtx_insn *curr;
+
+ FOR_BB_INSNS (bb, curr)
+ {
+ if (insn1 && INSN_P (insn1)
+ && insn2 && INSN_P (insn2))
+ changed |= optimize_2moves (insn1, insn2);
+
+ insn1 = insn2;
+ insn2 = curr;
+ }
+
+ return changed;
+}
+
+bool
+avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2)
+{
+ bool good = false;
+ bool bad = false;
+ rtx set1, dest1, src1;
+ rtx set2, dest2, src2;
+
+ if ((set1 = single_set (insn1))
+ && (set2 = single_set (insn2))
+ && (src1 = SET_SRC (set1))
+ && REG_P (src2 = SET_SRC (set2))
+ && REG_P (dest1 = SET_DEST (set1))
+ && REG_P (dest2 = SET_DEST (set2))
+ && rtx_equal_p (dest1, src2)
+ // Now we have:
+ // insn1: dest1 = src1
+ // insn2: dest2 = dest1
+ && REGNO (dest1) >= FIRST_PSEUDO_REGISTER
+ // Paranoia.
+ && GET_CODE (PATTERN (insn1)) != PARALLEL
+ && GET_CODE (PATTERN (insn2)) != PARALLEL
+ && (rtx_equal_p (dest2, src1)
+ || !reg_overlap_mentioned_p (dest2, src1)))
+ {
+ avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2);
+ avr_dump (";; reg %d: insn uses uids:", REGNO (dest1));
+
+ // Go check that dest1 is used exactly once, namely by insn2.
+
+ df_ref use = DF_REG_USE_CHAIN (REGNO (dest1));
+ for (; use; use = DF_REF_NEXT_REG (use))
+ {
+ rtx_insn *user = DF_REF_INSN (use);
+ avr_dump (" %d", INSN_UID (user));
+ good |= INSN_UID (user) == INSN_UID (insn2);
+ bad |= INSN_UID (user) != INSN_UID (insn2);
+ }
+ avr_dump (".\n");
+
+ if (good && !bad
+ // Propagate src1 to insn2:
+ // insn1: # Deleted
+ // insn2: dest2 = src1
+ && validate_change (insn2, &SET_SRC (set2), src1, false))
+ {
+ SET_INSN_DELETED (insn1);
+ return true;
+ }
+ }
+
+ if (good && !bad)
+ avr_dump (";; Failed\n");
+
+ return false;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
// Split insns with nonzero_bits() after combine.
static const pass_data avr_pass_data_split_nzb =
@@ -5704,6 +5835,14 @@ make_avr_pass_casesi (gcc::context *ctxt)
return new avr_pass_casesi (ctxt, "avr-casesi");
}
+// Optimize 2 consecutive moves after combine.
+
+rtl_opt_pass *
+make_avr_pass_2moves (gcc::context *ctxt)
+{
+ return new avr_pass_2moves (ctxt, "avr-2moves");
+}
+
rtl_opt_pass *
make_avr_pass_split_nzb (gcc::context *ctxt)
{
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index eb60a93..d668c7f 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -74,6 +74,14 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes);
INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
+/* Insn combine may come up with superfluous reg-reg moves, where the combine
+ people say that these are no problem since reg-alloc is supposed to optimize
+ them. The issue is that the lower-subreg pass sitting between combine and
+ reg-alloc may split such moves, coming up with a zoo of subregs which are
+ only handled poorly by the register allocator. */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_2moves);
+
/* Some combine insns have nonzero_bits() in their condition, though insns
should not use such stuff in their condition. Therefore, we split such
insn into something without nonzero_bits() in their condition right after
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ca30136..37911e7 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -208,6 +208,7 @@ extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_2moves (gcc::context *);
#ifdef RTX_CODE
extern bool avr_casei_sequence_check_operands (rtx *xop);
extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index c469297..1fb59b6 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -14418,6 +14418,13 @@ avr_output_addr_vec (rtx_insn *labl, rtx table)
// Output the label that precedes the table.
ASM_OUTPUT_ALIGN (stream, 1);
+
+ char s_labl[40];
+ targetm.asm_out.generate_internal_label (s_labl, "L",
+ CODE_LABEL_NUMBER (labl));
+ ASM_OUTPUT_TYPE_DIRECTIVE (stream, s_labl,
+ AVR_HAVE_JMP_CALL ? "object" : "function");
+
targetm.asm_out.internal_label (stream, "L", CODE_LABEL_NUMBER (labl));
// Output the table's content.
@@ -14984,10 +14991,11 @@ avr_addr_space_convert (rtx src, tree type_old, tree type_new)
/* Linearize memory: RAM has bit 23 set. When as_new = __flashx then
this is basically UB since __flashx mistreats RAM addresses, but there
- is no way to bail out. (Though -Waddr-space-convert will tell.) */
+ is no way to bail out. (Though -Waddr-space-convert will tell.)
+ ...but PR121277 is confusing, in particular when NULL is coming in. */
int msb = ADDR_SPACE_GENERIC_P (as_old)
- ? 0x80
+ ? as_new == ADDR_SPACE_MEMX ? 0x80 : 0x00
: avr_addrspace[as_old].segment;
src = force_reg (Pmode, src);
@@ -15085,10 +15093,16 @@ avr_convert_to_type (tree type, tree expr)
const char *name_old = avr_addrspace[as_old].name;
const char *name_new = avr_addrspace[as_new].name;
- warning (OPT_Waddr_space_convert,
- "conversion from address space %qs to address space %qs",
- ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old,
- ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new);
+ // Be relaxed when NULL is used, and when 0x0 stands for
+ // address 0x0.
+ bool nowarn = (expr == null_pointer_node
+ && (as_new == ADDR_SPACE_FLASHX
+ || as_new == ADDR_SPACE_FLASH));
+ if (!nowarn)
+ warning (OPT_Waddr_space_convert,
+ "conversion from address space %qs to address space %qs",
+ ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old,
+ ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new);
return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, expr);
}
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 9883119..7f6f18c 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -164,6 +164,10 @@ mfuse-move=
Target Joined RejectNegative UInteger Var(avropt_fuse_move) Init(0) Optimization IntegerRange(0, 23)
-mfuse-move=<0,23> Optimization. Run a post-reload pass that tweaks move instructions.
+mfuse-move2
+Target Var(avropt_fuse_move2) Init(0) Optimization
+Optimization. Fuse some move insns after insn combine.
+
mabsdata
Target Mask(ABSDATA)
Assume that all data in static storage can be accessed by LDS / STS instructions. This option is only useful for reduced Tiny devices like ATtiny40.
diff --git a/gcc/config/avr/avr.opt.urls b/gcc/config/avr/avr.opt.urls
index 662fdee..87c26b2 100644
--- a/gcc/config/avr/avr.opt.urls
+++ b/gcc/config/avr/avr.opt.urls
@@ -92,6 +92,9 @@ UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
mfuse-move=
UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
+mfuse-move2
+UrlSuffix(gcc/AVR-Options.html#index-mfuse-move2)
+
mabsdata
UrlSuffix(gcc/AVR-Options.html#index-mabsdata)
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 557568c..5ffeb23 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -54,6 +54,7 @@
#include "gimple.h"
#include "cgraph.h"
#include "case-cfn-macros.h"
+#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
@@ -183,6 +184,11 @@ gcn_option_override (void)
if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default;
+
+ /* TODO: This seems to produce tighter loops, but the testsuites expects it
+ to be set to '2', so I'll leave it default for now.
+ SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+ param_vect_partial_vector_usage, 1); */
}
/* }}} */
@@ -5789,6 +5795,16 @@ gcn_libc_has_function (enum function_class fn_class,
return bsd_libc_has_function (fn_class, type);
}
+/* Implement TARGET_VECTORIZE_PREFER_GATHER_SCATTER. */
+
+static bool
+gcn_prefer_gather_scatter (machine_mode ARG_UNUSED (mode),
+ int ARG_UNUSED (scale),
+ unsigned int ARG_UNUSED (group_size))
+{
+ return true;
+}
+
/* }}} */
/* {{{ md_reorg pass. */
@@ -8140,6 +8156,8 @@ gcn_dwarf_register_span (rtx rtl)
gcn_vectorize_builtin_vectorized_function
#undef TARGET_VECTORIZE_GET_MASK_MODE
#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
+#undef TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER gcn_prefer_gather_scatter
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c131577..53e86c8 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3226,7 +3226,7 @@ remove_partial_avx_dependency (void)
break;
}
- /* Only hanlde conversion here. */
+ /* Only handle conversion here. */
machine_mode src_mode
= convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
switch (src_mode)
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 2fedbeb..c2db305 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -91,7 +91,6 @@ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF V2TF */
VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */
VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */
-VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */
VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
VECTOR_MODE (FLOAT, BF, 2); /* V2BF */
VECTOR_MODE (FLOAT, HF, 6); /* V6HF */
@@ -102,7 +101,6 @@ VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODE (INT, QI, 12); /* V12QI */
VECTOR_MODE (INT, QI, 14); /* V14QI */
VECTOR_MODE (INT, HI, 6); /* V6HI */
-VECTOR_MODE (INT, SI, 64); /* V64SI */
INT_MODE (OI, 32);
INT_MODE (XI, 64);
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index ca6bb83..09a35ef 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -3615,6 +3615,18 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
return NULL_TREE;
}
+ if (TARGET_64BIT)
+ {
+ /* Do not warn when emulating the MS ABI. */
+ if ((TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE)
+ || ix86_function_type_abi (*node) != MS_ABI)
+ warning (OPT_Wattributes, "%qE attribute ignored",
+ name);
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
/* Can combine regparm with all attributes but fastcall, and thiscall. */
if (is_attribute_p ("regparm", name))
{
@@ -3627,7 +3639,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
{
- error ("regparam and thiscall attributes are not compatible");
+ error ("regparm and thiscall attributes are not compatible");
}
cst = TREE_VALUE (args);
@@ -3648,19 +3660,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
return NULL_TREE;
}
- if (TARGET_64BIT)
- {
- /* Do not warn when emulating the MS ABI. */
- if ((TREE_CODE (*node) != FUNCTION_TYPE
- && TREE_CODE (*node) != METHOD_TYPE)
- || ix86_function_type_abi (*node) != MS_ABI)
- warning (OPT_Wattributes, "%qE attribute ignored",
- name);
- *no_add_attrs = true;
- return NULL_TREE;
- }
-
- /* Can combine fastcall with stdcall (redundant) and sseregparm. */
+ /* Can combine fastcall with sseregparm. */
if (is_attribute_p ("fastcall", name))
{
if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
@@ -3681,8 +3681,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
}
}
- /* Can combine stdcall with fastcall (redundant), regparm and
- sseregparm. */
+ /* Can combine stdcall with regparm and sseregparm. */
else if (is_attribute_p ("stdcall", name))
{
if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
@@ -3732,6 +3731,10 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
{
error ("cdecl and thiscall attributes are not compatible");
}
+ if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("regparm and thiscall attributes are not compatible");
+ }
}
/* Can combine sseregparm with all attributes. */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 590cdf1..65e04d3 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -12442,6 +12442,28 @@ static GTY(()) rtx ix86_tls_symbol;
static rtx
ix86_tls_get_addr (void)
{
+ if (cfun->machine->call_saved_registers
+ == TYPE_NO_CALLER_SAVED_REGISTERS)
+ {
+ /* __tls_get_addr doesn't preserve vector registers. When a
+ function with no_caller_saved_registers attribute calls
+ __tls_get_addr, YMM and ZMM registers will be clobbered.
+ Issue an error and suggest -mtls-dialect=gnu2 in this case. */
+ if (cfun->machine->func_type == TYPE_NORMAL)
+ error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
+ " with the %<no_caller_saved_registers%> attribute"));
+ else
+ error (cfun->machine->func_type == TYPE_EXCEPTION
+ ? G_("%<-mtls-dialect=gnu2%> must be used with an"
+ " exception service routine")
+ : G_("%<-mtls-dialect=gnu2%> must be used with an"
+ " interrupt service routine"));
+ /* Don't issue the same error twice. */
+ cfun->machine->func_type = TYPE_NORMAL;
+ cfun->machine->call_saved_registers
+ = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
+ }
+
if (!ix86_tls_symbol)
{
const char *sym
@@ -20007,7 +20029,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
tree utype, ures, vce;
utype = unsigned_type_for (TREE_TYPE (arg0));
/* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
- instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
+ instead of ABS_EXPR to handle overflow case(TYPE_MIN). */
ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
loc = gimple_location (stmt);
@@ -21491,8 +21513,7 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
/* Register pair for mask registers. */
if (mode == P2QImode || mode == P2HImode)
return 2;
- if (mode == V64SFmode || mode == V64SImode)
- return 4;
+
return 1;
}
@@ -23132,7 +23153,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
So current solution is make constant disp as cheap as possible. */
if (GET_CODE (addr) == PLUS
&& x86_64_immediate_operand (XEXP (addr, 1), Pmode)
- /* Only hanlde (reg + disp) since other forms of addr are mostly LEA,
+ /* Only handle (reg + disp) since other forms of addr are mostly LEA,
there's no additional cost for the plus of disp. */
&& register_operand (XEXP (addr, 0), Pmode))
{
@@ -25211,20 +25232,14 @@ asm_preferred_eh_data_format (int code, int global)
return DW_EH_PE_absptr;
}
-/* Implement targetm.vectorize.builtin_vectorization_cost. */
+/* Worker for ix86_builtin_vectorization_cost and the fallback calls
+ from ix86_vector_costs::add_stmt_cost. */
static int
-ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
- tree vectype, int)
+ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
+ machine_mode mode)
{
- bool fp = false;
- machine_mode mode = TImode;
+ bool fp = FLOAT_MODE_P (mode);
int index;
- if (vectype != NULL)
- {
- fp = FLOAT_TYPE_P (vectype);
- mode = TYPE_MODE (vectype);
- }
-
switch (type_of_cost)
{
case scalar_stmt:
@@ -25283,14 +25298,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
COSTS_N_INSNS
(ix86_cost->gather_static
+ ix86_cost->gather_per_elt
- * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
+ * GET_MODE_NUNITS (mode)) / 2);
case vector_scatter_store:
return ix86_vec_cost (mode,
COSTS_N_INSNS
(ix86_cost->scatter_static
+ ix86_cost->scatter_per_elt
- * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
+ * GET_MODE_NUNITS (mode)) / 2);
case cond_branch_taken:
return ix86_cost->cond_taken_branch_cost;
@@ -25308,7 +25323,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
case vec_construct:
{
- int n = TYPE_VECTOR_SUBPARTS (vectype);
+ int n = GET_MODE_NUNITS (mode);
/* N - 1 element inserts into an SSE vector, the possible
GPR -> XMM move is accounted for in add_stmt_cost. */
if (GET_MODE_BITSIZE (mode) <= 128)
@@ -25336,6 +25351,17 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
}
}
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype, int)
+{
+ machine_mode mode = TImode;
+ if (vectype != NULL)
+ mode = TYPE_MODE (vectype);
+ return ix86_default_vector_cost (type_of_cost, mode);
+}
+
/* This function returns the calling abi specific va_list type node.
It returns the FNDECL specific va_list type. */
@@ -25789,7 +25815,7 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
unsigned
ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree node,
- tree vectype, int misalign,
+ tree vectype, int,
vect_cost_model_location where)
{
unsigned retval = 0;
@@ -26138,14 +26164,14 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
|| (SLP_TREE_MEMORY_ACCESS_TYPE (node)
== VMAT_GATHER_SCATTER)))))
{
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ stmt_cost = ix86_default_vector_cost (kind, mode);
stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
}
else if ((kind == vec_construct || kind == scalar_to_vec)
&& node
&& SLP_TREE_DEF_TYPE (node) == vect_external_def)
{
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ stmt_cost = ix86_default_vector_cost (kind, mode);
unsigned i;
tree op;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
@@ -26209,7 +26235,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
TREE_VISITED (op) = 0;
}
if (stmt_cost == -1)
- stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ stmt_cost = ix86_default_vector_cost (kind, mode);
if (kind == vec_perm && vectype
&& GET_MODE_SIZE (TYPE_MODE (vectype)) == 32)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index eb52699..a50475b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2968,7 +2968,8 @@
(match_operand:SWI248 1 "const_int_operand"))]
"optimize_insn_for_size_p () && optimize_size > 1
&& operands[1] != const0_rtx
- && operands[1] != constm1_rtx
+ && (operands[1] != constm1_rtx
+ || (<MODE>mode == DImode && LEGACY_INT_REG_P (operands[0])))
&& IN_RANGE (INTVAL (operands[1]), -128, 127)
&& !ix86_red_zone_used
&& REGNO (operands[0]) != SP_REG"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d88c3d6..ec74f93 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -21729,6 +21729,19 @@
(const_string "orig")))
(set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+;; Eliminate redundancy caused by
+;; /* Special case TImode to 128-bit vector conversions via V2DI. */
+;; in ix86_expand_vector_move
+
+(define_split
+ [(set (match_operand:V2DI 0 "register_operand")
+ (vec_concat:V2DI
+ (subreg:DI (match_operand:TI 1 "register_operand") 0)
+ (subreg:DI (match_dup 1) 8)))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ [(set (match_dup 0)
+ (subreg:V2DI (match_dup 1) 0))])
+
(define_insn "*vec_concatv2di_0"
[(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
(vec_concat:V2DI
diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize
index fd55255..34dad45 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -32,7 +32,7 @@ import itertools
from functools import reduce
SUPPORTED_ISA_SPEC = ["2.2", "20190608", "20191213"]
-CANONICAL_ORDER = "imafdgqlcbkjtpvn"
+CANONICAL_ORDER = "imafdqlcbkjtpvnh"
LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
#
diff --git a/gcc/config/riscv/gen-riscv-mcpu-texi.cc b/gcc/config/riscv/gen-riscv-mcpu-texi.cc
new file mode 100644
index 0000000..9681438
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-mcpu-texi.cc
@@ -0,0 +1,43 @@
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+int
+main ()
+{
+ puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+ puts ("@c This is part of the GCC manual.");
+ puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+ puts ("");
+ puts ("@c This file is generated automatically using");
+ puts ("@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from:");
+ puts ("@c gcc/config/riscv/riscv-cores.def");
+ puts ("");
+ puts ("@c Please *DO NOT* edit manually.");
+ puts ("");
+ puts ("@samp{Core Name}");
+ puts ("");
+ puts ("@opindex mcpu");
+ puts ("@item -mcpu=@var{processor-string}");
+ puts ("Use architecture of and optimize the output for the given processor, specified");
+ puts ("by particular CPU name. Permissible values for this option are:");
+ puts ("");
+ puts ("");
+
+ std::vector<std::string> coreNames;
+
+#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) \
+ coreNames.push_back (CORE_NAME);
+#include "riscv-cores.def"
+#undef RISCV_CORE
+
+ for (size_t i = 0; i < coreNames.size(); ++i) {
+ if (i == coreNames.size() - 1) {
+ printf("@samp{%s}.\n", coreNames[i].c_str());
+ } else {
+ printf("@samp{%s},\n\n", coreNames[i].c_str());
+ }
+ }
+
+ return 0;
+}
diff --git a/gcc/config/riscv/gen-riscv-mtune-texi.cc b/gcc/config/riscv/gen-riscv-mtune-texi.cc
new file mode 100644
index 0000000..1bdfe2a
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-mtune-texi.cc
@@ -0,0 +1,41 @@
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+int
+main ()
+{
+ puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+ puts ("@c This is part of the GCC manual.");
+ puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+ puts ("");
+ puts ("@c This file is generated automatically using");
+ puts ("@c gcc/config/riscv/gen-riscv-mtune-texi.cc from:");
+ puts ("@c gcc/config/riscv/riscv-cores.def");
+ puts ("");
+ puts ("@c Please *DO NOT* edit manually.");
+ puts ("");
+ puts ("@samp{Tune Name}");
+ puts ("");
+ puts ("@opindex mtune");
+ puts ("@item -mtune=@var{processor-string}");
+ puts ("Optimize the output for the given processor, specified by microarchitecture or");
+ puts ("particular CPU name. Permissible values for this option are:");
+ puts ("");
+ puts ("");
+
+ std::vector<std::string> tuneNames;
+
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
+ tuneNames.push_back (TUNE_NAME);
+#include "riscv-cores.def"
+#undef RISCV_TUNE
+
+ for (size_t i = 0; i < tuneNames.size(); ++i) {
+ printf("@samp{%s},\n\n", tuneNames[i].c_str());
+ }
+
+ puts ("and all valid options for @option{-mcpu=}.");
+
+ return 0;
+}
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 1c6bc25..44ef44a 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -400,7 +400,7 @@ costs::compute_local_live_ranges (
pair &live_range
= live_ranges->get_or_insert (lhs, &existed_p);
gcc_assert (!existed_p);
- if (STMT_VINFO_MEMORY_ACCESS_TYPE (program_point.stmt_info)
+ if (SLP_TREE_MEMORY_ACCESS_TYPE (*node)
== VMAT_LOAD_STORE_LANES)
point = get_first_lane_point (program_points,
program_point.stmt_info);
@@ -418,8 +418,7 @@ costs::compute_local_live_ranges (
bool existed_p = false;
pair &live_range
= live_ranges->get_or_insert (var, &existed_p);
- if (STMT_VINFO_MEMORY_ACCESS_TYPE (
- program_point.stmt_info)
+ if (SLP_TREE_MEMORY_ACCESS_TYPE (*node)
== VMAT_LOAD_STORE_LANES)
point = get_last_lane_point (program_points,
program_point.stmt_info);
@@ -608,7 +607,7 @@ costs::need_additional_vector_vars_p (stmt_vec_info stmt_info,
if (type == load_vec_info_type || type == store_vec_info_type)
{
if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
- && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER)
return true;
machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
@@ -1086,7 +1085,7 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
load/store. */
static int
segment_loadstore_group_size (enum vect_cost_for_stmt kind,
- stmt_vec_info stmt_info)
+ stmt_vec_info stmt_info, slp_tree node)
{
if (stmt_info
&& (kind == vector_load || kind == vector_store)
@@ -1094,7 +1093,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind,
{
stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
if (stmt_info
- && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES)
+ && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES)
return DR_GROUP_SIZE (stmt_info);
}
return 0;
@@ -1108,7 +1107,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind,
unsigned
costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
stmt_vec_info stmt_info,
- slp_tree, tree vectype, int stmt_cost)
+ slp_tree node, tree vectype, int stmt_cost)
{
const cpu_vector_cost *costs = get_vector_costs ();
switch (kind)
@@ -1131,7 +1130,8 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
each vector in the group. Here we additionally add permute
costs for each. */
/* TODO: Indexed and ordered/unordered cost. */
- int group_size = segment_loadstore_group_size (kind, stmt_info);
+ int group_size = segment_loadstore_group_size (kind, stmt_info,
+ node);
if (group_size > 1)
{
switch (group_size)
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 7aac56a..a7eaa8b 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -229,8 +229,41 @@ s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext)
$(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi
$(STAMP) s-riscv-ext.texi
-# Run `riscv-regen' after you changed or added anything from riscv-ext*.def
+RISCV_CORES_DEFS = \
+ $(srcdir)/config/riscv/riscv-cores.def
+
+build/gen-riscv-mtune-texi.o: $(srcdir)/config/riscv/gen-riscv-mtune-texi.cc \
+ $(RISCV_CORES_DEFS)
+ $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-mcpu-texi.o: $(srcdir)/config/riscv/gen-riscv-mcpu-texi.cc \
+ $(RISCV_CORES_DEFS)
+ $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-mtune-texi$(build_exeext): build/gen-riscv-mtune-texi.o
+ $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+build/gen-riscv-mcpu-texi$(build_exeext): build/gen-riscv-mcpu-texi.o
+ $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+$(srcdir)/doc/riscv-mtune.texi: $(RISCV_CORES_DEFS)
+$(srcdir)/doc/riscv-mtune.texi: s-riscv-mtune.texi ; @true
+
+$(srcdir)/doc/riscv-mcpu.texi: $(RISCV_CORES_DEFS)
+$(srcdir)/doc/riscv-mcpu.texi: s-riscv-mcpu.texi ; @true
+
+s-riscv-mtune.texi: build/gen-riscv-mtune-texi$(build_exeext)
+ $(RUN_GEN) build/gen-riscv-mtune-texi$(build_exeext) > tmp-riscv-mtune.texi
+ $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mtune.texi $(srcdir)/doc/riscv-mtune.texi
+ $(STAMP) s-riscv-mtune.texi
+
+s-riscv-mcpu.texi: build/gen-riscv-mcpu-texi$(build_exeext)
+ $(RUN_GEN) build/gen-riscv-mcpu-texi$(build_exeext) > tmp-riscv-mcpu.texi
+ $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mcpu.texi $(srcdir)/doc/riscv-mcpu.texi
+ $(STAMP) s-riscv-mcpu.texi
+
+# Run `riscv-regen' after you changed or added anything from riscv-ext*.def and riscv-cores*.def
.PHONY: riscv-regen
-riscv-regen: s-riscv-ext.texi s-riscv-ext.opt
+riscv-regen: s-riscv-ext.texi s-riscv-ext.opt s-riscv-mtune.texi s-riscv-mcpu.texi
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 1c60695..764b499 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10320,15 +10320,18 @@ can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
/* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
rotated over the highest bit. */
- int pos_one = clz_hwi ((c << 16) >> 16);
- middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
- int middle_ones = clz_hwi (~(c << pos_one));
- if (middle_zeros >= 16 && middle_ones >= 33)
+ unsigned HOST_WIDE_INT uc = c;
+ int pos_one = clz_hwi ((HOST_WIDE_INT) (uc << 16) >> 16);
+ if (pos_one != 0)
{
- *rot = pos_one;
- return true;
+ middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
+ int middle_ones = clz_hwi (~(uc << pos_one));
+ if (middle_zeros >= 16 && middle_ones >= 33)
+ {
+ *rot = pos_one;
+ return true;
+ }
}
-
return false;
}
@@ -10445,7 +10448,8 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
if (lz >= HOST_BITS_PER_WIDE_INT)
return false;
- int middle_ones = clz_hwi (~(c << lz));
+ unsigned HOST_WIDE_INT uc = c;
+ int middle_ones = clz_hwi (~(uc << lz));
if (tz + lz + middle_ones >= ones
&& (tz - lz) < HOST_BITS_PER_WIDE_INT
&& tz < HOST_BITS_PER_WIDE_INT)
@@ -10479,7 +10483,7 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
if (!IN_RANGE (pos_first_1, 1, HOST_BITS_PER_WIDE_INT-1))
return false;
- middle_ones = clz_hwi (~c << pos_first_1);
+ middle_ones = clz_hwi ((~(unsigned HOST_WIDE_INT) c) << pos_first_1);
middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
if (pos_first_1 < HOST_BITS_PER_WIDE_INT
&& middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
@@ -10581,7 +10585,8 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
{
/* li/lis; rldicX */
unsigned HOST_WIDE_INT imm = (c | ~mask);
- imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
+ if (shift != 0)
+ imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
count_or_emit_insn (temp, GEN_INT (imm));
if (shift != 0)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9c718ca..e31ee40 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1969,7 +1969,7 @@
[(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
(set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
{
- HOST_WIDE_INT val = INTVAL (operands[2]);
+ unsigned HOST_WIDE_INT val = UINTVAL (operands[2]);
HOST_WIDE_INT low = sext_hwi (val, 16);
HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index d760a7e..6becad1 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -128,6 +128,8 @@ extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
extern void s390_expand_vec_init (rtx, rtx);
extern rtx s390_expand_merge_perm_const (machine_mode, bool);
extern void s390_expand_merge (rtx, rtx, rtx, bool);
+extern void s390_expand_int_spaceship (rtx, rtx, rtx, rtx);
+extern void s390_expand_fp_spaceship (rtx, rtx, rtx, rtx);
extern rtx s390_build_signbit_mask (machine_mode);
extern rtx s390_return_addr_rtx (int, rtx);
extern rtx s390_back_chain_rtx (void);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index abe551c..012b6db 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -8213,6 +8213,167 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code,
NULL_RTX, 1, OPTAB_DIRECT), 1);
}
+/* Expand integer op0 = op1 <=> op2, i.e.,
+ op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : 1.
+
+ Signedness is specified by op3. If op3 equals 1, then perform an unsigned
+ comparison, and if op3 equals -1, then perform a signed comparison.
+
+ For integer comparisons we strive for a sequence like
+ CR[L] ; LHI ; LOCHIL ; LOCHIH
+ where the first three instructions fit into a group. */
+
+void
+s390_expand_int_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+ gcc_assert (op3 == const1_rtx || op3 == constm1_rtx);
+
+ rtx cc, cond_lt, cond_gt;
+ machine_mode cc_mode;
+ machine_mode mode = GET_MODE (op1);
+
+ /* Prior VXE3 emulate a 128-bit comparison by breaking it up into three
+ comparisons. First test the high halfs. In case they equal, then test
+ the low halfs. Finally, test for equality. Depending on the results
+ make use of LOCs. */
+ if (mode == TImode && !TARGET_VXE3)
+ {
+ gcc_assert (TARGET_VX);
+ op1
+ = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+ op2
+ = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+ rtx lab = gen_label_rtx ();
+ rtx ccz = gen_rtx_REG (CCZmode, CC_REGNUM);
+ /* Compare high halfs for equality.
+ VEC[L]G op1, op2 sets
+ CC1 if high(op1) < high(op2)
+ and
+ CC2 if high(op1) > high(op2). */
+ machine_mode cc_mode = op3 == const1_rtx ? CCUmode : CCSmode;
+ rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ emit_insn (gen_rtx_SET (
+ gen_rtx_REG (cc_mode, CC_REGNUM),
+ gen_rtx_COMPARE (cc_mode,
+ gen_rtx_VEC_SELECT (DImode, op1, lane0),
+ gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+ s390_emit_jump (lab, gen_rtx_NE (CCZmode, ccz, const0_rtx));
+ /* At this point we know that the high halfs equal.
+ VCHLGS op2, op1 sets CC1 if low(op1) < low(op2) */
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVIHUmode, op2, op1)),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+ emit_label (lab);
+ emit_insn (gen_rtx_SET (op0, const1_rtx));
+ emit_insn (
+ gen_movsicc (op0,
+ gen_rtx_LTU (CCUmode, gen_rtx_REG (CCUmode, CC_REGNUM),
+ const0_rtx),
+ constm1_rtx, op0));
+ /* Deal with the case where both halfs equal. */
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVEQmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVEQmode, op1, op2)),
+ gen_rtx_SET (gen_reg_rtx (V2DImode),
+ gen_rtx_EQ (V2DImode, op1, op2)))));
+ emit_insn (gen_movsicc (op0, gen_rtx_EQ (CCZmode, ccz, const0_rtx),
+ const0_rtx, op0));
+ return;
+ }
+
+ if (mode == QImode || mode == HImode)
+ {
+ rtx_code extend = op3 == const1_rtx ? ZERO_EXTEND : SIGN_EXTEND;
+ op1 = simplify_gen_unary (extend, SImode, op1, mode);
+ op1 = force_reg (SImode, op1);
+ op2 = simplify_gen_unary (extend, SImode, op2, mode);
+ op2 = force_reg (SImode, op2);
+ mode = SImode;
+ }
+
+ if (op3 == const1_rtx)
+ {
+ cc_mode = CCUmode;
+ cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+ cond_lt = gen_rtx_LTU (mode, cc, const0_rtx);
+ cond_gt = gen_rtx_GTU (mode, cc, const0_rtx);
+ }
+ else
+ {
+ cc_mode = CCSmode;
+ cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+ cond_lt = gen_rtx_LT (mode, cc, const0_rtx);
+ cond_gt = gen_rtx_GT (mode, cc, const0_rtx);
+ }
+
+ emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op1, op2)));
+ emit_move_insn (op0, const0_rtx);
+ emit_insn (gen_movsicc (op0, cond_lt, constm1_rtx, op0));
+ emit_insn (gen_movsicc (op0, cond_gt, const1_rtx, op0));
+}
+
+/* Expand floating-point op0 = op1 <=> op2, i.e.,
+ op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : 2.
+
+ If op3 equals const0_rtx, then we are interested in the compare only (see
+ test spaceship-fp-4.c). Otherwise, op3 is a CONST_INT different than
+ const1_rtx and constm1_rtx which is used in order to set op0 for unordered.
+
+ Emit a branch-only solution, i.e., let if-convert fold the branches into
+ LOCs if applicable. This has the benefit that the solution is also
+ applicable if we are only interested in the compare, i.e., if op3 equals
+ const0_rtx.
+ */
+
+void
+s390_expand_fp_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+ gcc_assert (op3 != const1_rtx && op3 != constm1_rtx);
+
+ machine_mode mode = GET_MODE (op1);
+ machine_mode cc_mode = s390_select_ccmode (LTGT, op1, op2);
+ rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+ rtx cond_unordered = gen_rtx_UNORDERED (mode, cc_reg, const0_rtx);
+ rtx cond_eq = gen_rtx_EQ (mode, cc_reg, const0_rtx);
+ rtx cond_gt = gen_rtx_GT (mode, cc_reg, const0_rtx);
+ rtx_insn *insn;
+ rtx l_unordered = gen_label_rtx ();
+ rtx l_eq = gen_label_rtx ();
+ rtx l_gt = gen_label_rtx ();
+ rtx l_end = gen_label_rtx ();
+
+ s390_emit_compare (VOIDmode, LTGT, op1, op2);
+ if (!flag_finite_math_only)
+ {
+ insn = s390_emit_jump (l_unordered, cond_unordered);
+ add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
+ }
+ insn = s390_emit_jump (l_eq, cond_eq);
+ add_reg_br_prob_note (insn, profile_probability::unlikely ());
+ insn = s390_emit_jump (l_gt, cond_gt);
+ add_reg_br_prob_note (insn, profile_probability::even ());
+ emit_move_insn (op0, constm1_rtx);
+ emit_jump (l_end);
+ emit_label (l_eq);
+ emit_move_insn (op0, const0_rtx);
+ emit_jump (l_end);
+ emit_label (l_gt);
+ emit_move_insn (op0, const1_rtx);
+ if (!flag_finite_math_only)
+ {
+ emit_jump (l_end);
+ emit_label (l_unordered);
+ rtx unord_val = op3 == const0_rtx ? const2_rtx : op3;
+ emit_move_insn (op0, unord_val);
+ }
+ emit_label (l_end);
+}
+
/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
We need to emit DTP-relative relocations. */
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 1edbfde..8cc48b0 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1527,6 +1527,27 @@
operands[0] = SET_DEST (PATTERN (curr_insn));
})
+; Restrict spaceship optab to z13 or later since there we have
+; LOAD HALFWORD IMMEDIATE ON CONDITION.
+
+(define_mode_iterator SPACESHIP_INT [(TI "TARGET_VX") DI SI HI QI])
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:SPACESHIP_INT 1 "register_operand")
+ (match_operand:SPACESHIP_INT 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_Z13 && TARGET_64BIT"
+ "s390_expand_int_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
+(define_mode_iterator SPACESHIP_BFP [TF DF SF])
+(define_expand "spaceship<mode>4"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:SPACESHIP_BFP 1 "register_operand")
+ (match_operand:SPACESHIP_BFP 2 "register_operand")
+ (match_operand:SI 3 "const_int_operand")]
+ "TARGET_Z13 && TARGET_64BIT && TARGET_HARD_FLOAT"
+ "s390_expand_fp_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
; (TF|DF|SF|TD|DD|SD) instructions