aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-cost-tables.h54
-rw-r--r--gcc/config/aarch64/aarch64-simd.md5
-rw-r--r--gcc/config/aarch64/aarch64-sve.md395
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md10
-rw-r--r--gcc/config/aarch64/aarch64.cc34
-rw-r--r--gcc/config/aarch64/cortex-a57-fma-steering.cc5
-rw-r--r--gcc/config/aarch64/iterators.md23
-rw-r--r--gcc/config/arm/aarch-cost-tables.h36
-rw-r--r--gcc/config/avr/avr-passes.cc6
-rw-r--r--gcc/config/cris/cris.cc6
-rw-r--r--gcc/config/gcn/gcn-valu.md262
-rw-r--r--gcc/config/gcn/gcn.cc106
-rw-r--r--gcc/config/i386/i386.cc6
-rw-r--r--gcc/config/pru/pru-pragma.cc13
-rw-r--r--gcc/config/pru/pru-protos.h8
-rw-r--r--gcc/config/pru/pru.cc8
-rw-r--r--gcc/config/riscv/autovec-opt.md122
-rw-r--r--gcc/config/riscv/autovec.md13
-rw-r--r--gcc/config/riscv/generic-vector-ooo.md85
-rw-r--r--gcc/config/riscv/mips-p8700.md2
-rw-r--r--gcc/config/riscv/predicates.md13
-rw-r--r--gcc/config/riscv/riscv-protos.h16
-rw-r--r--gcc/config/riscv/riscv-string.cc6
-rw-r--r--gcc/config/riscv/riscv-v.cc300
-rw-r--r--gcc/config/riscv/riscv-vector-builtins-bases.cc3
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.cc41
-rw-r--r--gcc/config/riscv/riscv-vector-builtins.h1
-rw-r--r--gcc/config/riscv/riscv.cc40
-rw-r--r--gcc/config/riscv/vector-iterators.md16
-rw-r--r--gcc/config/riscv/vector.md410
-rw-r--r--gcc/config/riscv/xiangshan.md3
-rw-r--r--gcc/config/s390/s390.cc11
-rw-r--r--gcc/config/xtensa/xtensa.cc44
-rw-r--r--gcc/config/xtensa/xtensa.md16
34 files changed, 1531 insertions, 588 deletions
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index c49ff7f..e7926eb 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -125,9 +125,9 @@ const struct cpu_cost_table qdf24xx_extra_costs =
{
COSTS_N_INSNS (1), /* alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -233,9 +233,9 @@ const struct cpu_cost_table thunderx_extra_costs =
{
COSTS_N_INSNS (1), /* Alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -340,9 +340,9 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
{
COSTS_N_INSNS (1), /* Alu. */
COSTS_N_INSNS (4), /* Mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -447,9 +447,9 @@ const struct cpu_cost_table thunderx3t110_extra_costs =
{
COSTS_N_INSNS (1), /* Alu. */
COSTS_N_INSNS (4), /* Mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -555,9 +555,9 @@ const struct cpu_cost_table tsv110_extra_costs =
{
COSTS_N_INSNS (1), /* alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -662,9 +662,9 @@ const struct cpu_cost_table a64fx_extra_costs =
{
COSTS_N_INSNS (1), /* alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -769,9 +769,9 @@ const struct cpu_cost_table ampere1_extra_costs =
{
COSTS_N_INSNS (3), /* alu. */
COSTS_N_INSNS (3), /* mult. */
- COSTS_N_INSNS (2), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (1), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -876,9 +876,9 @@ const struct cpu_cost_table ampere1a_extra_costs =
{
COSTS_N_INSNS (3), /* alu. */
COSTS_N_INSNS (3), /* mult. */
- COSTS_N_INSNS (2), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (1), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -983,9 +983,9 @@ const struct cpu_cost_table ampere1b_extra_costs =
{
COSTS_N_INSNS (1), /* alu. */
COSTS_N_INSNS (2), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (1), /* dup. */
- COSTS_N_INSNS (1) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (0), /* dup. */
+ COSTS_N_INSNS (0) /* extract. */
}
};
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 270cb2f..8b75c3d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1190,13 +1190,16 @@
[(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
)
+;; Inserting from the zero register into a vector lane is treated as an
+;; expensive GP->FP move on all CPUs. Avoid it when optimizing for speed.
(define_insn "aarch64_simd_vec_set_zero<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_merge:VALL_F16
(match_operand:VALL_F16 1 "register_operand" "0")
(match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "")
(match_operand:SI 2 "immediate_operand" "i")))]
- "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0"
+ "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0
+ && optimize_function_for_size_p (cfun)"
{
int elt = ENDIAN_LANE_N (<nunits>,
aarch64_exact_log2_inverse (<nunits>,
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 10aecf1..b252eef 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3752,9 +3752,9 @@
;; Unpredicated floating-point unary operations.
(define_insn "@aarch64_sve_<optab><mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
- (unspec:SVE_FULL_F
- [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 1 "register_operand" "w")]
SVE_FP_UNARY))]
"TARGET_SVE"
"<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
@@ -3762,25 +3762,41 @@
;; Unpredicated floating-point unary operations.
(define_expand "<optab><mode>2"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_dup 2)
- (const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 1 "register_operand")]
+ (match_dup 3)
+ (match_operand:SVE_F 1 "register_operand")]
SVE_COND_FP_UNARY_OPTAB))]
"TARGET_SVE"
{
+ operands[2] = aarch64_sve_fp_pred (<MODE>mode, &operands[3]);
+ }
+)
+
+;; FABS and FNEG are non-trapping, so we can always expand with a <VPRED>
+;; predicate. It doesn't matter whether the padding bits of a partial
+;; vector mode are active or inactive.
+(define_expand "<optab><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 2)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_F 1 "register_operand")]
+ SVE_COND_FP_UNARY_BITWISE))]
+ "TARGET_SVE"
+ {
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
;; Predicated floating-point unary operations.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 3 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F 2 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")]
SVE_COND_FP_UNARY))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
@@ -3806,13 +3822,13 @@
;; Predicated floating-point unary arithmetic, merging with the first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 3)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")]
SVE_COND_FP_UNARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -3854,15 +3870,15 @@
;; as earlyclobber helps to make the instruction more regular to the
;; register allocator.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")]
SVE_COND_FP_UNARY)
- (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
@@ -5495,27 +5511,25 @@
;; Split a predicated instruction whose predicate is unused into an
;; unpredicated instruction.
(define_split
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (match_operand:SI 4 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
<SVE_COND_FP>))]
- "TARGET_SVE
- && reload_completed
- && INTVAL (operands[4]) == SVE_RELAXED_GP"
+ "TARGET_SVE && reload_completed"
[(set (match_dup 0)
- (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16 (match_dup 2) (match_dup 3)))]
+ (SVE_UNPRED_FP_BINARY:SVE_F_B16B16 (match_dup 2) (match_dup 3)))]
)
;; Unpredicated floating-point binary operations (post-RA only).
;; These are generated by the split above.
(define_insn "*post_ra_<sve_fp_op><mode>3"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w")
- (SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16
- (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand" "w")))]
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand" "=w")
+ (SVE_UNPRED_FP_BINARY:SVE_F_B16B16
+ (match_operand:SVE_F_B16B16 1 "register_operand" "w")
+ (match_operand:SVE_F_B16B16 2 "register_operand" "w")))]
"TARGET_SVE && reload_completed"
"<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
@@ -5547,10 +5561,10 @@
;; Unpredicated floating-point binary operations.
(define_insn "@aarch64_sve_<optab><mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
- (unspec:SVE_FULL_F
- [(match_operand:SVE_FULL_F 1 "register_operand" "w")
- (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 1 "register_operand" "w")
+ (match_operand:SVE_F 2 "register_operand" "w")]
SVE_FP_BINARY))]
"TARGET_SVE"
"<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
@@ -5559,27 +5573,27 @@
;; Unpredicated floating-point binary operations that need to be predicated
;; for SVE.
(define_expand "<optab><mode>3"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_dup 3)
- (const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
- (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
+ (match_dup 4)
+ (match_operand:SVE_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
+ (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
SVE_COND_FP_BINARY_OPTAB))]
"TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
{
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]);
}
)
;; Predicated floating-point binary operations that have no immediate forms.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 4 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")]
SVE_COND_FP_BINARY_REG))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
@@ -5607,14 +5621,14 @@
;; Predicated floating-point operations, merging with the first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5650,14 +5664,14 @@
;; Same for operations that take a 1-bit constant.
(define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5693,14 +5707,14 @@
;; Predicated floating-point operations, merging with the second input.
(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
(match_dup 3)]
UNSPEC_SEL))]
@@ -5736,16 +5750,16 @@
;; Predicated floating-point operations, merging with an independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
- [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+ (unspec:SVE_F_B16B16
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F_B16B16
+ (unspec:SVE_F_B16B16
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
- (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+ (match_operand:SVE_F_B16B16 2 "register_operand")
+ (match_operand:SVE_F_B16B16 3 "register_operand")]
SVE_COND_FP_BINARY)
- (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE
&& (<supports_bf16> || !<is_bf16>)
@@ -5818,16 +5832,16 @@
;; Same for operations that take a 1-bit constant.
(define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
SVE_COND_FP_BINARY_I1)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 4 ]
@@ -5892,12 +5906,12 @@
;; Predicated floating-point addition.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 4 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand")]
SVE_COND_FP_ADD))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ]
@@ -5914,14 +5928,14 @@
;; Predicated floating-point addition of a constant, merging with the
;; first input.
(define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
(match_dup 2)]
UNSPEC_SEL))]
@@ -5962,16 +5976,16 @@
;; Predicated floating-point addition of a constant, merging with an
;; independent value.
(define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
UNSPEC_COND_FADD)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
{@ [ cons: =0 , 1 , 2 , 3 , 4 ]
@@ -6208,12 +6222,12 @@
;; Predicated floating-point subtraction.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 4 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand")
+ (match_operand:SVE_F 3 "register_operand")]
SVE_COND_FP_SUB))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
@@ -6229,14 +6243,14 @@
;; Predicated floating-point subtraction from a constant, merging with the
;; second input.
(define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 4)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
(match_dup 3)]
UNSPEC_SEL))]
@@ -6273,16 +6287,16 @@
;; Predicated floating-point subtraction from a constant, merging with an
;; independent value.
(define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_FULL_F
+ (unspec:SVE_F
[(match_operand 5)
(const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
- (match_operand:SVE_FULL_F 3 "register_operand")]
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+ (match_operand:SVE_F 3 "register_operand")]
UNSPEC_COND_FSUB)
- (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
{@ [ cons: =0 , 1 , 3 , 4 ]
@@ -6631,12 +6645,12 @@
;; Predicated floating-point multiplication.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 4 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand")]
SVE_COND_FP_MUL))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx ]
@@ -6671,12 +6685,12 @@
;; -------------------------------------------------------------------------
(define_expand "div<mode>3"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
[(match_dup 3)
- (const_int SVE_RELAXED_GP)
- (match_operand:SVE_FULL_F 1 "nonmemory_operand")
- (match_operand:SVE_FULL_F 2 "register_operand")]
+ (match_dup 4)
+ (match_operand:SVE_F 1 "nonmemory_operand")
+ (match_operand:SVE_F 2 "register_operand")]
UNSPEC_COND_FDIV))]
"TARGET_SVE"
{
@@ -6684,23 +6698,23 @@
DONE;
operands[1] = force_reg (<MODE>mode, operands[1]);
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]);
}
)
(define_expand "@aarch64_frecpe<mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:SVE_FULL_F 1 "register_operand")]
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 1 "register_operand")]
UNSPEC_FRECPE))]
"TARGET_SVE"
)
(define_expand "@aarch64_frecps<mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:SVE_FULL_F 1 "register_operand")
- (match_operand:SVE_FULL_F 2 "register_operand")]
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")]
UNSPEC_FRECPS))]
"TARGET_SVE"
)
@@ -6865,12 +6879,12 @@
;; Predicated floating-point maximum/minimum.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:SVE_FULL_F 0 "register_operand")
- (unspec:SVE_FULL_F
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 4 "aarch64_sve_gp_strictness")
- (match_operand:SVE_FULL_F 2 "register_operand")
- (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_sve_float_maxmin_operand")]
SVE_COND_FP_MAXMIN))]
"TARGET_SVE"
{@ [ cons: =0 , 1 , %2 , 3 ; attrs: movprfx ]
@@ -9653,6 +9667,31 @@
}
)
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_PARTIAL_F:mode><SVE_HSDI:mode>_relaxed"
+ [(set (match_operand:SVE_HSDI 0 "register_operand")
+ (unspec:SVE_HSDI
+ [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand")
+ (unspec:SVE_HSDI
+ [(match_operand 4)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_PARTIAL_F 2 "register_operand")]
+ SVE_COND_FCVTI)
+ (match_operand:SVE_HSDI 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+ [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+ }
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+)
+
(define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
[(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
(unspec:SVE_FULL_HSDI
@@ -9706,6 +9745,29 @@
}
)
+(define_insn_and_rewrite "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx2SI_ONLY:mode>_relaxed"
+ [(set (match_operand:VNx2SI_ONLY 0 "register_operand")
+ (unspec:VNx2SI_ONLY
+ [(match_operand:VNx2BI 1 "register_operand")
+ (unspec:VNx2SI_ONLY
+ [(match_operand 4)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:VNx2DF_ONLY 2 "register_operand")]
+ SVE_COND_FCVTI)
+ (match_operand:VNx2SI_ONLY 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+ [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+ }
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT<-FP] Packs
;; -------------------------------------------------------------------------
@@ -9857,6 +9919,31 @@
}
)
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>_relaxed"
+ [(set (match_operand:SVE_PARTIAL_F 0 "register_operand")
+ (unspec:SVE_PARTIAL_F
+ [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand")
+ (unspec:SVE_PARTIAL_F
+ [(match_operand 4)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_HSDI 2 "register_operand")]
+ SVE_COND_ICVTF)
+ (match_operand:SVE_PARTIAL_F 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+ [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+ }
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+)
+
(define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
[(set (match_operand:SVE_FULL_F 0 "register_operand")
(unspec:SVE_FULL_F
@@ -10066,6 +10153,30 @@
}
)
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn_and_rewrite "*cond_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>"
+ [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand")
+ (unspec:SVE_PARTIAL_HSF
+ [(match_operand:<SVE_SDF:VPRED> 1 "register_operand")
+ (unspec:SVE_PARTIAL_HSF
+ [(match_operand 4)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_SDF 2 "register_operand")]
+ SVE_COND_FCVT)
+ (match_operand:SVE_PARTIAL_HSF 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+ [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+ }
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+)
+
;; -------------------------------------------------------------------------
;; ---- [FP<-FP] Packs (bfloat16)
;; -------------------------------------------------------------------------
@@ -10259,6 +10370,30 @@
}
)
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>_relaxed"
+ [(set (match_operand:SVE_SDF 0 "register_operand")
+ (unspec:SVE_SDF
+ [(match_operand:<SVE_SDF:VPRED> 1 "register_operand")
+ (unspec:SVE_SDF
+ [(match_operand 4)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_PARTIAL_HSF 2 "register_operand")]
+ SVE_COND_FCVT)
+ (match_operand:SVE_SDF 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+ [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+ [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+ }
+ "&& !rtx_equal_p (operands[1], operands[4])"
+ {
+ operands[4] = copy_rtx (operands[1]);
+ }
+)
+
;; -------------------------------------------------------------------------
;; ---- [PRED<-PRED] Packs
;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 8c03e28..31bdd85 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1346,12 +1346,12 @@
;; Predicated B16B16 binary operations.
(define_insn "@aarch64_pred_<optab><mode>"
- [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
- (unspec:VNx8BF_ONLY
- [(match_operand:<VPRED> 1 "register_operand")
+ [(set (match_operand:SVE_BF 0 "register_operand")
+ (unspec:SVE_BF
+ [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
(match_operand:SI 4 "aarch64_sve_gp_strictness")
- (match_operand:VNx8BF_ONLY 2 "register_operand")
- (match_operand:VNx8BF_ONLY 3 "register_operand")]
+ (match_operand:SVE_BF 2 "register_operand")
+ (match_operand:SVE_BF 3 "register_operand")]
SVE_COND_FP_BINARY_OPTAB))]
"TARGET_SSVE_B16B16 && <supports_bf16>"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx , is_rev ]
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 0485f69..9e4a37b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -15854,11 +15854,14 @@ cost_plus:
break;
case CONST_VECTOR:
{
- /* Load using MOVI/MVNI. */
- if (aarch64_simd_valid_mov_imm (x))
- *cost = extra_cost->vect.movi;
- else /* Load using constant pool. */
- *cost = extra_cost->ldst.load;
+ if (speed)
+ {
+ /* Load using MOVI/MVNI. */
+ if (aarch64_simd_valid_mov_imm (x))
+ *cost += extra_cost->vect.movi;
+ else /* Load using constant pool. */
+ *cost += extra_cost->ldst.load;
+ }
break;
}
case VEC_CONCAT:
@@ -15867,7 +15870,8 @@ cost_plus:
break;
case VEC_DUPLICATE:
/* Load using a DUP. */
- *cost = extra_cost->vect.dup;
+ if (speed)
+ *cost += extra_cost->vect.dup;
return false;
case VEC_SELECT:
{
@@ -15875,13 +15879,16 @@ cost_plus:
*cost = rtx_cost (op0, GET_MODE (op0), VEC_SELECT, 0, speed);
/* cost subreg of 0 as free, otherwise as DUP */
- rtx op1 = XEXP (x, 1);
- if (vec_series_lowpart_p (mode, GET_MODE (op1), op1))
- ;
- else if (vec_series_highpart_p (mode, GET_MODE (op1), op1))
- *cost = extra_cost->vect.dup;
- else
- *cost = extra_cost->vect.extract;
+ if (speed)
+ {
+ rtx op1 = XEXP (x, 1);
+ if (vec_series_lowpart_p (mode, GET_MODE (op1), op1))
+ ;
+ else if (vec_series_highpart_p (mode, GET_MODE (op1), op1))
+ *cost += extra_cost->vect.dup;
+ else
+ *cost += extra_cost->vect.extract;
+ }
return true;
}
default:
@@ -17969,6 +17976,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
/* Check if we've seen an SVE gather/scatter operation and which size. */
if (kind == scalar_load
+ && vectype
&& aarch64_sve_mode_p (TYPE_MODE (vectype))
&& vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
{
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.cc b/gcc/config/aarch64/cortex-a57-fma-steering.cc
index fd6da66..f7675be 100644
--- a/gcc/config/aarch64/cortex-a57-fma-steering.cc
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.cc
@@ -948,6 +948,11 @@ func_fma_steering::analyze ()
/* Search the chain where this instruction is (one of) the root. */
dest_op_info = insn_rr[INSN_UID (insn)].op_info;
+
+ /* Register rename could fail. */
+ if (!dest_op_info)
+ continue;
+
dest_regno = REGNO (SET_DEST (PATTERN (insn)));
for (i = 0; i < dest_op_info->n_chains; i++)
{
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c59fcd6..8533912 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -463,6 +463,7 @@
(define_mode_iterator VNx8SI_ONLY [VNx8SI])
(define_mode_iterator VNx8SF_ONLY [VNx8SF])
(define_mode_iterator VNx8DI_ONLY [VNx8DI])
+(define_mode_iterator VNx2SI_ONLY [VNx2SI])
(define_mode_iterator VNx4SI_ONLY [VNx4SI])
(define_mode_iterator VNx4SF_ONLY [VNx4SF])
(define_mode_iterator VNx2DI_ONLY [VNx2DI])
@@ -3366,6 +3367,10 @@
(define_int_iterator SVE_INT_UNARY [UNSPEC_REVB
UNSPEC_REVH UNSPEC_REVW])
+;; This iterator is currently only used for estimation instructions,
+;; which are never generated automatically when -ftrapping-math is true.
+;; The iterator is therefore applied unconditionally to partial FP modes.
+;; This might need to be revisited if new operations are added in future.
(define_int_iterator SVE_FP_UNARY [UNSPEC_FRECPE UNSPEC_RSQRTE])
(define_int_iterator SVE_FP_UNARY_INT [(UNSPEC_FEXPA "TARGET_NON_STREAMING")])
@@ -3378,6 +3383,10 @@
(define_int_iterator SVE_INT_BINARY_MULTI [UNSPEC_SQDMULH
UNSPEC_SRSHL UNSPEC_URSHL])
+;; This iterator is currently only used for estimation instructions,
+;; which are never generated automatically when -ftrapping-math is true.
+;; The iterator is therefore applied unconditionally to partial FP modes.
+;; This might need to be revisited if new operations are added in future.
(define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS])
(define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
@@ -3429,9 +3438,10 @@
UNSPEC_FMINQV
UNSPEC_FMINNMQV])
-(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS
- UNSPEC_COND_FNEG
- UNSPEC_COND_FRECPX
+(define_int_iterator SVE_COND_FP_UNARY_BITWISE [UNSPEC_COND_FABS
+ UNSPEC_COND_FNEG])
+
+(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FRECPX
UNSPEC_COND_FRINTA
UNSPEC_COND_FRINTI
UNSPEC_COND_FRINTM
@@ -3439,13 +3449,12 @@
UNSPEC_COND_FRINTP
UNSPEC_COND_FRINTX
UNSPEC_COND_FRINTZ
- UNSPEC_COND_FSQRT])
+ UNSPEC_COND_FSQRT
+ SVE_COND_FP_UNARY_BITWISE])
;; Same as SVE_COND_FP_UNARY, but without codes that have a dedicated
;; <optab><mode>2 expander.
-(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FABS
- UNSPEC_COND_FNEG
- UNSPEC_COND_FRECPX
+(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FRECPX
UNSPEC_COND_FRINTA
UNSPEC_COND_FRINTI
UNSPEC_COND_FRINTM
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index c7a14b3..0600e59 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -123,9 +123,9 @@ const struct cpu_cost_table generic_extra_costs =
{
COSTS_N_INSNS (1), /* alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -230,9 +230,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
{
COSTS_N_INSNS (1), /* alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -337,9 +337,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
{
COSTS_N_INSNS (1), /* alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -444,9 +444,9 @@ const struct cpu_cost_table cortexa76_extra_costs =
{
COSTS_N_INSNS (1), /* alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -551,9 +551,9 @@ const struct cpu_cost_table exynosm1_extra_costs =
{
COSTS_N_INSNS (0), /* alu. */
COSTS_N_INSNS (4), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
@@ -658,9 +658,9 @@ const struct cpu_cost_table xgene1_extra_costs =
{
COSTS_N_INSNS (2), /* alu. */
COSTS_N_INSNS (8), /* mult. */
- COSTS_N_INSNS (1), /* movi. */
- COSTS_N_INSNS (2), /* dup. */
- COSTS_N_INSNS (2) /* extract. */
+ COSTS_N_INSNS (0), /* movi. */
+ COSTS_N_INSNS (1), /* dup. */
+ COSTS_N_INSNS (1) /* extract. */
}
};
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 284f49d..6a88a27 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -4120,9 +4120,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
JUMP_LABEL (cbranch) = xop[4];
++LABEL_NUSES (xop[4]);
- rtx_insn *seq1 = get_insns ();
rtx_insn *last1 = get_last_insn ();
- end_sequence ();
+ rtx_insn *seq1 = end_sequence ();
emit_insn_after (seq1, insns[2]);
@@ -4141,9 +4140,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
emit_insn (pat_4);
- rtx_insn *seq2 = get_insns ();
rtx_insn *last2 = get_last_insn ();
- end_sequence ();
+ rtx_insn *seq2 = end_sequence ();
emit_insn_after (seq2, insns[3]);
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index a34c9e9..4acdd1d 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -3711,9 +3711,11 @@ cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
/* Determine if the source using MOF. If it is, automatically
clobbering MOF would cause it to have impossible constraints. */
- /* Look for a use of the MOF constraint letter: h. */
+ /* Look for a use of the MOF constraint letter h or a hard register
+ constraint. */
for (unsigned i = 0, n = constraints.length(); i < n; ++i)
- if (strchr (constraints[i], 'h') != NULL)
+ if (strchr (constraints[i], 'h') != NULL
+ || strstr (constraints[i], "{mof}") != NULL)
return NULL;
/* Look for an output or an input that touches MOF. */
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 7c4dde1..3899117 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1133,6 +1133,23 @@
DONE;
})
+(define_expand "gather_load<mode><vndi>"
+ [(match_operand:V_MOV 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:<VnDI> 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")]
+ ""
+ {
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), NULL);
+
+ emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
; Allow any address expression
(define_expand "gather<mode>_expr<exec>"
[(set (match_operand:V_MOV 0 "register_operand")
@@ -1259,6 +1276,23 @@
DONE;
})
+(define_expand "scatter_store<mode><vndi>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:<VnDI> 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:V_MOV 4 "register_operand")]
+ ""
+ {
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), NULL);
+
+ emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
; Allow any address expression
(define_expand "scatter<mode>_expr<exec_scatter>"
[(set (mem:BLK (scratch))
@@ -1645,6 +1679,39 @@
[(set_attr "type" "vmult")
(set_attr "length" "8")])
+(define_insn_and_split "add<mode>3_dup"
+ [(set (match_operand:V_DI 0 "register_operand" "= v")
+ (plus:V_DI
+ (vec_duplicate:V_DI
+ (match_operand:DI 1 "register_operand" "SvB"))
+ (match_operand:V_DI 2 "gcn_alu_operand" "vDb")))
+ (clobber (reg:DI VCC_REG))
+ (clobber (match_scratch:<VnSI> 3 "=&v"))]
+ ""
+ "#"
+ "gcn_can_split_p (<MODE>mode, operands[0])
+ && gcn_can_split_p (<MODE>mode, operands[1])
+ && gcn_can_split_p (<MODE>mode, operands[2])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_add<vnsi>3_vcc_dup
+ (gcn_operand_part (<MODE>mode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (<MODE>mode, operands[2], 0),
+ vcc));
+ emit_insn (gen_vec_duplicate<vnsi> (operands[3],
+ gcn_operand_part (DImode, operands[1], 1)));
+ emit_insn (gen_addc<vnsi>3
+ (gcn_operand_part (<MODE>mode, operands[0], 1),
+ operands[3],
+ gcn_operand_part (<MODE>mode, operands[2], 1),
+ vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
(define_insn_and_split "add<mode>3_exec"
[(set (match_operand:V_DI 0 "register_operand" "= v")
(vec_merge:V_DI
@@ -1682,6 +1749,49 @@
[(set_attr "type" "vmult")
(set_attr "length" "8")])
+(define_insn_and_split "add<mode>3_dup_exec"
+ [(set (match_operand:V_DI 0 "register_operand" "= v")
+ (vec_merge:V_DI
+ (plus:V_DI
+ (vec_duplicate:V_DI
+ (match_operand:DI 1 "register_operand" "SvB"))
+ (match_operand:V_DI 2 "gcn_alu_operand" "vDb"))
+ (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (reg:DI VCC_REG))
+ (clobber (match_scratch:<VnSI> 5 "=&v"))]
+ ""
+ "#"
+ "gcn_can_split_p (<MODE>mode, operands[0])
+ && gcn_can_split_p (<MODE>mode, operands[1])
+ && gcn_can_split_p (<MODE>mode, operands[2])
+ && gcn_can_split_p (<MODE>mode, operands[4])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_add<vnsi>3_vcc_dup_exec
+ (gcn_operand_part (<MODE>mode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (<MODE>mode, operands[2], 0),
+ vcc,
+ gcn_operand_part (<MODE>mode, operands[3], 0),
+ operands[4]));
+ emit_insn (gen_vec_duplicate<vnsi>_exec (operands[5],
+ gcn_operand_part (DImode, operands[1], 1),
+ gcn_gen_undef (<VnSI>mode),
+ operands[4]));
+ emit_insn (gen_addc<vnsi>3_exec
+ (gcn_operand_part (<MODE>mode, operands[0], 1),
+ operands[5],
+ gcn_operand_part (<MODE>mode, operands[2], 1),
+ vcc, vcc,
+ gcn_operand_part (<MODE>mode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
(define_insn_and_split "sub<mode>3"
[(set (match_operand:V_DI 0 "register_operand" "= v, v")
(minus:V_DI
@@ -2187,6 +2297,22 @@
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
+(define_insn "<su>mul<mode>3_highpart_dup<exec>"
+ [(set (match_operand:V_SI 0 "register_operand" "= v")
+ (truncate:V_SI
+ (lshiftrt:<VnDI>
+ (mult:<VnDI>
+ (any_extend:<VnDI>
+ (vec_duplicate:V_SI
+ (match_operand:SI 1 "gcn_alu_operand" "SvA")))
+ (any_extend:<VnDI>
+ (match_operand:V_SI 2 "gcn_alu_operand" " vA")))
+ (const_int 32))))]
+ ""
+ "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
(define_insn "mul<mode>3<exec>"
[(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
(mult:V_INT_1REG
@@ -2198,11 +2324,11 @@
(set_attr "length" "8")])
(define_insn "mul<mode>3_dup<exec>"
- [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
+ [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
(mult:V_INT_1REG
- (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
(vec_duplicate:V_INT_1REG
- (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
+ (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvA"))
+ (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vA")))]
""
"v_mul_lo_u32\t%0, %1, %2"
[(set_attr "type" "vop3a")
@@ -2238,6 +2364,37 @@
DONE;
})
+(define_insn_and_split "mul<mode>3_dup"
+ [(set (match_operand:V_DI 0 "register_operand" "=&v")
+ (mult:V_DI
+ (vec_duplicate:V_DI
+ (match_operand:DI 1 "gcn_alu_operand" " Sv"))
+ (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
+ (clobber (match_scratch:<VnSI> 3 "=&v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
+ rtx left_lo = gcn_operand_part (DImode, operands[1], 0);
+ rtx left_hi = gcn_operand_part (DImode, operands[1], 1);
+ rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
+ rtx tmp = operands[3];
+
+ emit_insn (gen_mul<vnsi>3_dup (out_lo, left_lo, right_lo));
+ emit_insn (gen_umul<vnsi>3_highpart_dup (out_hi, left_lo, right_lo));
+ emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_lo));
+ emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+ emit_insn (gen_mul<vnsi>3_dup (tmp, left_lo, right_hi));
+ emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+ emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_hi));
+ emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+ DONE;
+ })
+
(define_insn_and_split "mul<mode>3_exec"
[(set (match_operand:V_DI 0 "register_operand" "=&v")
(vec_merge:V_DI
@@ -2286,6 +2443,56 @@
DONE;
})
+(define_insn_and_split "mul<mode>3_dup_exec"
+ [(set (match_operand:V_DI 0 "register_operand" "=&v")
+ (vec_merge:V_DI
+ (mult:V_DI
+ (vec_duplicate:V_DI
+ (match_operand:DI 1 "gcn_alu_operand" " Sv"))
+ (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
+ (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (match_scratch:<VnSI> 5 "=&v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
+ rtx left_lo = gcn_operand_part (DImode, operands[1], 0);
+ rtx left_hi = gcn_operand_part (DImode, operands[1], 1);
+ rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
+ rtx exec = operands[4];
+ rtx tmp = operands[5];
+
+ rtx old_lo, old_hi;
+ if (GET_CODE (operands[3]) == UNSPEC)
+ {
+ old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
+ }
+ else
+ {
+ old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
+ old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
+ }
+
+ rtx undef = gcn_gen_undef (<VnSI>mode);
+
+ emit_insn (gen_mul<vnsi>3_dup_exec (out_lo, left_lo, right_lo, old_lo,
+ exec));
+ emit_insn (gen_umul<vnsi>3_highpart_dup_exec (out_hi, left_lo, right_lo,
+ old_hi, exec));
+ emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_lo, undef, exec));
+ emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_lo, right_hi, undef, exec));
+ emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_hi, undef, exec));
+ emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ DONE;
+ })
+
(define_insn_and_split "mul<mode>3_zext"
[(set (match_operand:V_DI 0 "register_operand" "=&v")
(mult:V_DI
@@ -4049,6 +4256,32 @@
DONE;
})
+(define_expand "mask_gather_load<mode><vndi>"
+ [(set:V_MOV (match_operand:V_MOV 0 "register_operand")
+ (unspec:V_MOV
+ [(match_operand:DI 1 "register_operand")
+ (match_operand:<VnDI> 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")
+ (match_operand:DI 5 "")
+ (match_operand:V_MOV 6 "maskload_else_operand")]
+ UNSPEC_GATHER))]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[5]);
+
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), exec);
+
+ emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+ const0_rtx, const0_rtx,
+ const0_rtx,
+ gcn_gen_undef (<MODE>mode),
+ exec));
+ DONE;
+ })
+
(define_expand "mask_scatter_store<mode><vnsi>"
[(match_operand:DI 0 "register_operand")
(match_operand:<VnSI> 1 "register_operand")
@@ -4077,6 +4310,27 @@
DONE;
})
+(define_expand "mask_scatter_store<mode><vndi>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:<VnDI> 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:V_MOV 4 "register_operand")
+ (match_operand:DI 5 "")]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[5]);
+
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), exec);
+
+ emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+ operands[4], const0_rtx,
+ const0_rtx, exec));
+ DONE;
+ })
+
(define_code_iterator cond_op [plus minus mult])
(define_expand "cond_<expander><mode>"
@@ -4397,7 +4651,7 @@
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
- emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
+ emit_insn (gen_mul<mode>3_dup (tmp, operands[2], v1));
emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
DONE;
})
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 0ce5a29..3b26d5c 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -1275,13 +1275,13 @@ gen_##PREFIX##vN##SUFFIX (PARAMS) \
}
#define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
-GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS))) \
GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS))) \
GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS))) \
static rtx \
gen_##PREFIX##vNm##SUFFIX (PARAMS) \
{ \
@@ -1289,13 +1289,13 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS) \
\
switch (mode) \
{ \
- case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \
- case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \
- case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \
+ USE_QHF (case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS);) \
+ USE_QHF (case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS);) \
+ USE_QHF (case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS);) \
case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \
- case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \
+ USE_QHF (case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS);) \
case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \
- case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \
+ USE_QHF (case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS);) \
default: \
break; \
} \
@@ -1340,13 +1340,13 @@ gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
}
#define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \
-GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS))) \
GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS))) \
GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS))) \
USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \
static rtx \
gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
@@ -1355,15 +1355,22 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
\
switch (mode) \
{ \
- case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \
- case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \
- case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \
- case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
- case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
- case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
- case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
- case E_TImode: \
- USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
+ USE_QHF (case E_QImode: \
+ return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec);) \
+ USE_QHF (case E_HImode: \
+ return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec);) \
+ USE_QHF (case E_HFmode: \
+ return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec);) \
+ case E_SImode: \
+ return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
+ USE_QHF (case E_SFmode: \
+ return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec);) \
+ case E_DImode: \
+ return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
+ USE_QHF (case E_DFmode: \
+ return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec);) \
+ USE_TI (case E_TImode: \
+ return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
default: \
break; \
} \
@@ -1372,7 +1379,8 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
return NULL_RTX; \
}
-/* These have TImode support. */
+/* These support everything. */
+#define USE_QHF(ARGS) ARGS
#define USE_TI(ARGS) ARGS
GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
@@ -1382,6 +1390,7 @@ GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
#define USE_TI(ARGS)
GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,di3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
A(dest, src1, src2, vcc))
GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
@@ -1393,15 +1402,20 @@ GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc),
GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin),
A(dest, src1, src2, vccout, vccin))
GEN_VN (and,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
-GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec),
A(dest, addr, src, exec))
GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol),
A(dest, addr, as, vol))
-GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c))
+/* These do not have QI, HI, or any FP support. */
+#undef USE_QHF
+#define USE_QHF(ARGS)
+GEN_VNM (ashl,3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
+GEN_VNM (mul,3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+
+#undef USE_QHF
#undef USE_TI
#undef GEN_VNM
#undef GEN_VN
@@ -1995,8 +2009,8 @@ gcn_expand_vector_init (rtx op0, rtx vec)
rtx addr = gen_reg_rtx (addrmode);
int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0)));
- emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)),
- GEN_INT (unit_size)));
+ emit_insn (gen_mulvNsi3_dup (ramp, GEN_INT (unit_size),
+ gen_rtx_REG (offsetmode, VGPR_REGNO (1))));
bool simple_repeat = true;
@@ -2293,36 +2307,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
Return values.
ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses.
- ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */
+ ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.
+ 64-bit offsets - return VnDImode vector of absolute addresses. */
rtx
gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
bool unsigned_p, rtx exec)
{
int vf = GET_MODE_NUNITS (GET_MODE (offsets));
- rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
- rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
+ rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets));
+ rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode));
+ bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode;
if (CONST_INT_P (scale)
&& INTVAL (scale) > 0
&& exact_log2 (INTVAL (scale)) >= 0)
- emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
- GEN_INT (exact_log2 (INTVAL (scale))),
- NULL, exec));
+ emit_insn (gen_ashlvNm3 (scaled_offsets, offsets,
+ GEN_INT (exact_log2 (INTVAL (scale))),
+ NULL, exec));
else
- emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
+ emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec));
+ /* No instructions support DImode offsets. */
+ if (use_di)
+ {
+ emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, exec));
+ return abs_addr;
+ }
/* "Global" instructions do not support negative register offsets. */
- if (as == ADDR_SPACE_FLAT || !unsigned_p)
+ else if (as == ADDR_SPACE_FLAT || !unsigned_p)
{
if (unsigned_p)
- emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
+ emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base,
+ NULL, exec));
else
- emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
- return tmpdi;
+ emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base,
+ NULL, exec));
+ return abs_addr;
}
else if (as == ADDR_SPACE_GLOBAL)
- return tmpsi;
+ return scaled_offsets;
gcc_unreachable ();
}
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 49bd393..4682db85 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -24788,6 +24788,12 @@ static void map_egpr_constraints (vec<const char *> &constraints)
buf.safe_push (cur[j + 1]);
j++;
break;
+ case '{':
+ do
+ {
+ buf.safe_push (cur[j]);
+ } while (cur[j++] != '}');
+ break;
default:
buf.safe_push (cur[j]);
break;
diff --git a/gcc/config/pru/pru-pragma.cc b/gcc/config/pru/pru-pragma.cc
index c3f3d33..9338780 100644
--- a/gcc/config/pru/pru-pragma.cc
+++ b/gcc/config/pru/pru-pragma.cc
@@ -46,21 +46,24 @@ pru_pragma_ctable_entry (cpp_reader *)
enum cpp_ttype type;
type = pragma_lex (&ctable_index);
- if (type == CPP_NUMBER && tree_fits_uhwi_p (ctable_index))
+ if (type == CPP_NUMBER && tree_fits_shwi_p (ctable_index))
{
type = pragma_lex (&base_addr);
- if (type == CPP_NUMBER && tree_fits_uhwi_p (base_addr))
+ if (type == CPP_NUMBER && tree_fits_shwi_p (base_addr))
{
- unsigned HOST_WIDE_INT i = tree_to_uhwi (ctable_index);
- unsigned HOST_WIDE_INT base = tree_to_uhwi (base_addr);
+ HOST_WIDE_INT i = tree_to_shwi (ctable_index);
+ HOST_WIDE_INT base = sext_hwi (tree_to_shwi (base_addr),
+ POINTER_SIZE);
type = pragma_lex (&base_addr);
if (type != CPP_EOF)
error ("junk at end of %<#pragma CTABLE_ENTRY%>");
- else if (i >= ARRAY_SIZE (pru_ctable))
+ else if (!IN_RANGE (i, 0, ARRAY_SIZE (pru_ctable) - 1))
error ("%<CTABLE_ENTRY%> index %wd is not valid", i);
else if (pru_ctable[i].valid && pru_ctable[i].base != base)
error ("redefinition of %<CTABLE_ENTRY %wd%>", i);
+ else if (!IN_RANGE (base, INT32_MIN, INT32_MAX))
+ error ("%<CTABLE_ENTRY%> base address does not fit in 32-bits");
else
{
if (base & 0xff)
diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h
index c73fad8..4750f0e 100644
--- a/gcc/config/pru/pru-protos.h
+++ b/gcc/config/pru/pru-protos.h
@@ -23,7 +23,7 @@
struct pru_ctable_entry {
bool valid;
- unsigned HOST_WIDE_INT base;
+ HOST_WIDE_INT base;
};
extern struct pru_ctable_entry pru_ctable[32];
@@ -66,9 +66,9 @@ pru_regno_ok_for_index_p (int regno, bool strict_p)
return pru_regno_ok_for_base_p (regno, strict_p);
}
-extern int pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr);
-extern int pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr);
-extern int pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr);
+extern int pru_get_ctable_exact_base_index (HOST_WIDE_INT caddr);
+extern int pru_get_ctable_base_index (HOST_WIDE_INT caddr);
+extern int pru_get_ctable_base_offset (HOST_WIDE_INT caddr);
extern int pru_symref2ioregno (rtx op);
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index 47e5f24..322e319 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -1428,7 +1428,7 @@ pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
/* Recognize a CTABLE base address. Return CTABLE entry index, or -1 if
base was not found in the pragma-filled pru_ctable. */
int
-pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
+pru_get_ctable_exact_base_index (HOST_WIDE_INT caddr)
{
unsigned int i;
@@ -1444,7 +1444,7 @@ pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
/* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
and return the base CTABLE index if possible. */
int
-pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
+pru_get_ctable_base_index (HOST_WIDE_INT caddr)
{
unsigned int i;
@@ -1461,7 +1461,7 @@ pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
/* Return the offset from some CTABLE base for this address. */
int
-pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
+pru_get_ctable_base_offset (HOST_WIDE_INT caddr)
{
int i;
@@ -2004,7 +2004,7 @@ pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
case CONST_INT:
{
- unsigned HOST_WIDE_INT caddr = INTVAL (op);
+ HOST_WIDE_INT caddr = INTVAL (op);
int base = pru_get_ctable_base_index (caddr);
int offs = pru_get_ctable_base_offset (caddr);
if (base < 0)
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index f372f0e..6531996 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1714,6 +1714,74 @@
}
[(set_attr "type" "vialu")])
+(define_insn_and_split "*<sat_op_v_vdup>_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (if_then_else:V_VLSI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:V_VLSI
+ [(match_operand:V_VLSI 3 "register_operand")
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 4 "reg_or_int_operand"))] VSAT_VX_OP_V_VDUP)
+ (unspec:V_VLSI
+ [(match_operand:DI 2 "register_operand")] UNSPEC_VUNDEF)))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ int vxrm_val = INTVAL (operands[9]);
+ riscv_vector::expand_vx_binary_vxrm_vec_vec_dup (operands[0], operands[3],
+ operands[4],
+ <VSAT_VX_OP_V_VDUP>,
+ vxrm_val, <MODE>mode);
+
+ DONE;
+ }
+ [(set_attr "type" "vaalu")])
+
+(define_insn_and_split "*<sat_op_vdup_v>_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+ (if_then_else:V_VLSI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand")
+ (match_operand 5 "vector_length_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:V_VLSI
+ [(vec_duplicate:V_VLSI
+ (match_operand:<VEL> 4 "reg_or_int_operand"))
+ (match_operand:V_VLSI 3 "register_operand")] VSAT_VX_OP_VDUP_V)
+ (unspec:V_VLSI
+ [(match_operand:DI 2 "register_operand")] UNSPEC_VUNDEF)))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ int vxrm_val = INTVAL (operands[9]);
+ riscv_vector::expand_vx_binary_vxrm_vec_dup_vec (operands[0], operands[3],
+ operands[4],
+ <VSAT_VX_OP_VDUP_V>,
+ vxrm_val, <MODE>mode);
+
+ DONE;
+ }
+ [(set_attr "type" "vaalu")])
+
;; =============================================================================
;; Combine vec_duplicate + op.vv to op.vf
;; Include
@@ -1838,8 +1906,58 @@
emit_insn (gen_extend<vsubel><vel>2(tmp, operands[1]));
rtx ops[] = {operands[0], tmp};
- riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
- riscv_vector::UNARY_OP, ops);
+ riscv_vector::expand_broadcast (<MODE>mode, ops);
+ DONE;
+ }
+ [(set_attr "type" "vfwmuladd")]
+)
+
+;; vfwnmacc.vf
+(define_insn_and_split "*vfwnmacc_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (minus:VWEXTF
+ (mult:VWEXTF
+ (neg:VWEXTF
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 2 "register_operand"))))
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")))
+ (match_operand:VWEXTF 1 "register_operand")))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+ riscv_vector::emit_vlmax_insn(
+ code_for_pred_widen_mul_neg_scalar(MINUS, <MODE>mode),
+ riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+ DONE;
+ }
+ [(set_attr "type" "vfwmuladd")]
+)
+
+;; vfwnmsac.vf
+(define_insn_and_split "*vfwnmsac_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+ (minus:VWEXTF
+ (match_operand:VWEXTF 1 "register_operand")
+ (mult:VWEXTF
+ (float_extend:VWEXTF
+ (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
+ (vec_duplicate:VWEXTF
+ (float_extend:<VEL>
+ (match_operand:<VSUBEL> 2 "register_operand"))))))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+ {
+ rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+ riscv_vector::emit_vlmax_insn(
+ code_for_pred_widen_mul_neg_scalar (PLUS, <MODE>mode),
+ riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
DONE;
}
[(set_attr "type" "vfwmuladd")]
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index da5d0e3..48de5ef 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1359,9 +1359,7 @@
if (operands[2] == const0_rtx)
{
rtx ops[] = {operands[0], operands[0], operands[1]};
- riscv_vector::emit_nonvlmax_insn (code_for_pred_broadcast (<MODE>mode),
- riscv_vector::SCALAR_MOVE_MERGED_OP_TU,
- ops, CONST1_RTX (Pmode));
+ riscv_vector::expand_set_first_tu (<MODE>mode, ops);
}
else
{
@@ -1385,8 +1383,7 @@
VL we need for the slide. */
rtx tmp = gen_reg_rtx (<MODE>mode);
rtx ops1[] = {tmp, operands[1]};
- emit_nonvlmax_insn (code_for_pred_broadcast (<MODE>mode),
- riscv_vector::UNARY_OP, ops1, length);
+ riscv_vector::expand_broadcast (<MODE>mode, ops1, length);
/* Slide exactly one element up leaving the tail elements
unchanged. */
@@ -2489,7 +2486,8 @@
(sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
(sign_extend:VWEXTI
- (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))]
+ (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
+ (const_int 1))))]
"TARGET_VECTOR"
{
insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode);
@@ -2522,7 +2520,8 @@
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
(sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
- (const_int 1)))))]
+ (const_int 1))
+ (const_int 1))))]
"TARGET_VECTOR"
{
insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode);
diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md
index ab9e57f..773003b 100644
--- a/gcc/config/riscv/generic-vector-ooo.md
+++ b/gcc/config/riscv/generic-vector-ooo.md
@@ -17,6 +17,9 @@
;; <http://www.gnu.org/licenses/>.
;; Vector load/store
+;; The insn reservations include "generic" as we won't have a in-order
+;; generic definition for vector instructions.
+
(define_automaton "vector_ooo")
;; Separate issue queue for vector instructions.
@@ -29,119 +32,141 @@
(define_cpu_unit "vxu_ooo_multicycle" "vector_ooo")
(define_insn_reservation "vec_load" 6
- (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr"))
"vxu_ooo_issue,vxu_ooo_alu")
(define_insn_reservation "vec_store" 6
- (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector segment loads/stores.
(define_insn_reservation "vec_loadstore_seg" 10
- (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff,\
- vssegte,vssegts,vssegtux,vssegtox")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff,\
+ vssegte,vssegts,vssegtux,vssegtox"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Regular vector operations and integer comparisons.
(define_insn_reservation "vec_alu" 3
- (eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\
- vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\
- vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\
+ vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\
+ vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector float comparison, conversion etc.
(define_insn_reservation "vec_fcmp" 3
- (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\
- vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
- vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\
+ vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
+ vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector integer multiplication.
(define_insn_reservation "vec_imul" 4
- (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\
- vghsh,vgmul")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\
+ vghsh,vgmul"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector float addition.
(define_insn_reservation "vec_fadd" 4
- (eq_attr "type" "vfalu,vfwalu")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vfalu,vfwalu"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector float multiplication and FMA.
(define_insn_reservation "vec_fmul" 6
- (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc,sf_vfnrclip")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc,sf_vfnrclip"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector crypto, assumed to be a generic operation for now.
(define_insn_reservation "vec_crypto" 4
- (eq_attr "type" "crypto,vclz,vctz,vcpop")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "crypto,vclz,vctz,vcpop"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector crypto, AES
(define_insn_reservation "vec_crypto_aes" 4
- (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector crypto, sha
(define_insn_reservation "vec_crypto_sha" 4
- (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector crypto, SM3/4
(define_insn_reservation "vec_crypto_sm" 4
- (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector permute.
(define_insn_reservation "vec_perm" 3
- (eq_attr "type" "vimerge,vfmerge,vslideup,vslidedown,vislide1up,\
- vislide1down,vfslide1up,vfslide1down,vgather,vcompress")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vimerge,vfmerge,vslideup,vslidedown,vislide1up,\
+ vislide1down,vfslide1up,vfslide1down,vgather,vcompress"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector reduction.
(define_insn_reservation "vec_reduction" 8
- (eq_attr "type" "vired,viwred,vfredu,vfwredu")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vired,viwred,vfredu,vfwredu"))
"vxu_ooo_issue,vxu_ooo_multicycle")
;; Vector ordered reduction, assume the latency number is for
;; a 128-bit vector. It is scaled in riscv_sched_adjust_cost
;; for larger vectors.
(define_insn_reservation "vec_ordered_reduction" 10
- (eq_attr "type" "vfredo,vfwredo")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vfredo,vfwredo"))
"vxu_ooo_issue,vxu_ooo_multicycle*3")
;; Vector integer division, assume not pipelined.
(define_insn_reservation "vec_idiv" 16
- (eq_attr "type" "vidiv")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vidiv"))
"vxu_ooo_issue,vxu_ooo_multicycle*3")
;; Vector float divisions and sqrt, assume not pipelined.
(define_insn_reservation "vec_float_divsqrt" 16
- (eq_attr "type" "vfdiv,vfsqrt")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vfdiv,vfsqrt"))
"vxu_ooo_issue,vxu_ooo_multicycle*3")
;; Vector mask operations.
(define_insn_reservation "vec_mask" 2
- (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,\
- vfmovvf,vfmovfv")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,\
+ vfmovvf,vfmovfv"))
"vxu_ooo_issue,vxu_ooo_alu")
;; Vector vsetvl.
(define_insn_reservation "vec_vesetvl" 1
- (eq_attr "type" "vsetvl,vsetvl_pre")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "vsetvl,vsetvl_pre"))
"vxu_ooo_issue")
;; Vector rounding mode setters, assume pipeline barrier.
(define_insn_reservation "vec_setrm" 20
- (eq_attr "type" "wrvxrm,wrfrm")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "wrvxrm,wrfrm"))
"vxu_ooo_issue,vxu_ooo_issue*3")
;; Vector read vlen/vlenb.
(define_insn_reservation "vec_readlen" 4
- (eq_attr "type" "rdvlenb,rdvl")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "rdvlenb,rdvl"))
"vxu_ooo_issue,vxu_ooo_issue")
;; Vector sf_vcp.
(define_insn_reservation "vec_sf_vcp" 2
- (eq_attr "type" "sf_vc,sf_vc_se")
+ (and (eq_attr "tune" "generic_ooo,generic")
+ (eq_attr "type" "sf_vc,sf_vc_se"))
"vxu_ooo_issue")
diff --git a/gcc/config/riscv/mips-p8700.md b/gcc/config/riscv/mips-p8700.md
index ae0ea8d..fac9abb 100644
--- a/gcc/config/riscv/mips-p8700.md
+++ b/gcc/config/riscv/mips-p8700.md
@@ -163,5 +163,5 @@
vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
- sf_vc,sf_vc_se"))
+ sf_vc,sf_vc_se,ghost"))
"mips_p8700_dummies")
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 1f9a6b5..381f96c 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -518,6 +518,10 @@
(define_predicate "vector_broadcast_mask_operand"
(ior (match_operand 0 "vector_least_significant_set_mask_operand")
+ (match_operand 0 "vector_all_trues_mask_operand")))
+
+(define_predicate "strided_broadcast_mask_operand"
+ (ior (match_operand 0 "vector_least_significant_set_mask_operand")
(ior (match_operand 0 "register_operand")
(match_operand 0 "vector_all_trues_mask_operand"))))
@@ -619,6 +623,15 @@
(define_predicate "direct_broadcast_operand"
(match_test "riscv_vector::can_be_broadcast_p (op)"))
+;; A strided broadcast is just a fallback pattern that loads from
+;; memory.
+(define_predicate "strided_broadcast_operand"
+ (match_test "riscv_vector::strided_broadcast_p (op)"))
+
+(define_predicate "any_broadcast_operand"
+ (ior (match_operand 0 "direct_broadcast_operand")
+ (match_operand 0 "strided_broadcast_operand")))
+
;; A CONST_INT operand that has exactly two bits cleared.
(define_predicate "const_nottwobits_operand"
(and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index a41c4c2..539321f 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -414,8 +414,14 @@ enum insn_flags : unsigned int
/* Means INSN has VXRM operand and the value is VXRM_RNU. */
VXRM_RNU_P = 1 << 20,
+ /* Means INSN has VXRM operand and the value is VXRM_RNE. */
+ VXRM_RNE_P = 1 << 21,
+
/* Means INSN has VXRM operand and the value is VXRM_RDN. */
- VXRM_RDN_P = 1 << 21,
+ VXRM_RDN_P = 1 << 22,
+
+ /* Means INSN has VXRM operand and the value is VXRM_ROD. */
+ VXRM_ROD_P = 1 << 23,
};
enum insn_type : unsigned int
@@ -477,7 +483,9 @@ enum insn_type : unsigned int
BINARY_OP_TUMA = __MASK_OP_TUMA | BINARY_OP_P,
BINARY_OP_FRM_DYN = BINARY_OP | FRM_DYN_P,
BINARY_OP_VXRM_RNU = BINARY_OP | VXRM_RNU_P,
+ BINARY_OP_VXRM_RNE = BINARY_OP | VXRM_RNE_P,
BINARY_OP_VXRM_RDN = BINARY_OP | VXRM_RDN_P,
+ BINARY_OP_VXRM_ROD = BINARY_OP | VXRM_ROD_P,
/* Ternary operator. Always have real merge operand. */
TERNARY_OP = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P | HAS_MERGE_P
@@ -672,6 +680,8 @@ void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode,
machine_mode);
void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode);
void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode);
+void expand_vx_binary_vxrm_vec_vec_dup (rtx, rtx, rtx, int, int, machine_mode);
+void expand_vx_binary_vxrm_vec_dup_vec (rtx, rtx, rtx, int, int, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx), enum avl_type);
@@ -695,6 +705,9 @@ bool expand_block_move (rtx, rtx, rtx, bool);
machine_mode preferred_simd_mode (scalar_mode);
machine_mode get_mask_mode (machine_mode);
void expand_vec_series (rtx, rtx, rtx, rtx = 0);
+void expand_broadcast (machine_mode, rtx *, rtx = 0);
+void expand_set_first (machine_mode, rtx *, rtx = 0);
+void expand_set_first_tu (machine_mode, rtx *, rtx = 0);
void expand_vec_init (rtx, rtx);
void expand_vec_perm (rtx, rtx, rtx, rtx);
void expand_select_vl (rtx *);
@@ -762,6 +775,7 @@ enum vlmul_type get_vlmul (rtx_insn *);
int count_regno_occurrences (rtx_insn *, unsigned int);
bool imm_avl_p (machine_mode);
bool can_be_broadcast_p (rtx);
+bool strided_broadcast_p (rtx);
bool gather_scatter_valid_offset_p (machine_mode);
HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 9080189..61c4a09 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1625,16 +1625,14 @@ expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in)
Otherwise, use a predicated store. */
if (known_eq (GET_MODE_SIZE (info.vmode), INTVAL (info.avl)))
{
- emit_vlmax_insn (code_for_pred_broadcast (info.vmode), UNARY_OP,
- broadcast_ops);
+ riscv_vector::expand_broadcast (info.vmode, broadcast_ops);
emit_move_insn (dst, fill_value);
}
else
{
if (!satisfies_constraint_vl (info.avl))
info.avl = force_reg (Pmode, info.avl);
- emit_nonvlmax_insn (code_for_pred_broadcast (info.vmode),
- riscv_vector::UNARY_OP, broadcast_ops, info.avl);
+ riscv_vector::expand_broadcast (info.vmode, broadcast_ops, info.avl);
machine_mode mask_mode
= riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (info.vmode))
.require ();
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 242ac08..54eb8c6 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -351,9 +351,12 @@ public:
add_rounding_mode_operand (FRM_RNE);
else if (m_insn_flags & VXRM_RNU_P)
add_rounding_mode_operand (VXRM_RNU);
+ else if (m_insn_flags & VXRM_RNE_P)
+ add_rounding_mode_operand (VXRM_RNE);
else if (m_insn_flags & VXRM_RDN_P)
add_rounding_mode_operand (VXRM_RDN);
-
+ else if (m_insn_flags & VXRM_ROD_P)
+ add_rounding_mode_operand (VXRM_ROD);
if (insn_data[(int) icode].n_operands != m_opno)
internal_error ("invalid number of operands for insn %s, "
@@ -1190,6 +1193,59 @@ expand_vector_init_trailing_same_elem (rtx target,
return false;
}
+/* Helper function to emit a vmv.vx/vi and float variants.
+ If VL is not given a VLMAX insn will be emitted, otherwise
+ a non-VLMAX insn with length VL.
+ If the value to be broadcast is not suitable for vmv.vx
+ fall back to a vlse with zero stride. This itself has a
+ fallback if the uarch prefers not to use a strided load
+ for broadcast. */
+
+void
+expand_broadcast (machine_mode mode, rtx *ops, rtx vl)
+{
+ rtx elt = ops[1];
+ avl_type type = vl ? NONVLMAX : VLMAX;
+ if (can_be_broadcast_p (elt))
+ emit_avltype_insn (code_for_pred_broadcast (mode), UNARY_OP, ops,
+ type, vl);
+ else
+ emit_avltype_insn (code_for_pred_strided_broadcast (mode),
+ UNARY_OP, ops, type, vl);
+}
+
+/* Similar to expand_broadcast but emits a vmv.s.x/vfmv.s.f instead. */
+
+void
+expand_set_first (machine_mode mode, rtx *ops, rtx vl)
+{
+ rtx elt = ops[1];
+ avl_type type = vl ? NONVLMAX : VLMAX;
+ if (can_be_broadcast_p (elt))
+ emit_avltype_insn (code_for_pred_broadcast (mode),
+ SCALAR_MOVE_OP, ops, type, vl);
+ else
+ emit_avltype_insn (code_for_pred_strided_broadcast (mode),
+ SCALAR_MOVE_OP, ops, type, vl);
+}
+
+/* Similar to expand_set_first but keeping the tail elements
+ unchanged (TU) */
+
+void
+expand_set_first_tu (machine_mode mode, rtx *ops, rtx vl)
+{
+ rtx elt = ops[2];
+ if (!vl)
+ vl = const1_rtx;
+ if (can_be_broadcast_p (elt))
+ emit_nonvlmax_insn (code_for_pred_broadcast (mode),
+ SCALAR_MOVE_MERGED_OP_TU, ops, vl);
+ else
+ emit_nonvlmax_insn (code_for_pred_strided_broadcast (mode),
+ SCALAR_MOVE_MERGED_OP_TU, ops, vl);
+}
+
static void
expand_const_vec_duplicate (rtx target, rtx src, rtx elt)
{
@@ -1226,7 +1282,7 @@ expand_const_vec_duplicate (rtx target, rtx src, rtx elt)
if (lra_in_progress)
{
rtx ops[] = {result, elt};
- emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops);
+ expand_broadcast (mode, ops);
}
else
{
@@ -1278,8 +1334,7 @@ expand_const_vector_duplicate_repeating (rtx target, rvv_builder *builder)
{
dup = gen_reg_rtx (builder->new_mode ());
rtx ops[] = {dup, ele};
- emit_vlmax_insn (code_for_pred_broadcast (builder->new_mode ()),
- UNARY_OP, ops);
+ expand_broadcast (builder->new_mode (), ops);
}
else
dup = expand_vector_broadcast (builder->new_mode (), ele);
@@ -1322,8 +1377,7 @@ expand_const_vector_duplicate_default (rtx target, rvv_builder *builder)
rtx tmp1 = gen_reg_rtx (builder->mode ());
rtx dup_ops[] = {tmp1, builder->elt (0)};
- emit_vlmax_insn (code_for_pred_broadcast (builder->mode ()), UNARY_OP,
- dup_ops);
+ expand_broadcast (builder->mode (), dup_ops);
for (unsigned int i = 1; i < builder->npatterns (); i++)
{
@@ -2136,18 +2190,32 @@ has_vi_variant_p (rtx_code code, rtx x)
}
}
+/* This is a helper for binary ops with DImode scalar operands that are
+ broadcast (like vadd.vx v1, a1).
+ Instead of having similar code for all the expanders this function
+ unifies the handling. For 64-bit targets all we do is choose
+ between the vi variant (if available) and the register variant.
+ For 32-bit targets we either create the sign-extending variant
+ of vop.vx (when the immediate fits 32 bits) or emit a vector
+ broadcast of the 64-bit register/immediate and switch to a
+ vop.vv (replacing the scalar op with the broadcast vector. */
+
bool
sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl,
machine_mode vector_mode, bool has_vi_variant_p,
void (*emit_vector_func) (rtx *, rtx), enum avl_type type)
{
machine_mode scalar_mode = GET_MODE_INNER (vector_mode);
+
+ /* If the scalar broadcast op fits an immediate, use the
+ vop.vi variant if there is one. */
if (has_vi_variant_p)
{
*scalar_op = force_reg (scalar_mode, *scalar_op);
return false;
}
+ /* On a 64-bit target we can always use the vop.vx variant. */
if (TARGET_64BIT)
{
if (!rtx_equal_p (*scalar_op, const0_rtx))
@@ -2155,6 +2223,8 @@ sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl,
return false;
}
+ /* For 32 bit and if there is no vop.vi variant for a 32-bit immediate
+ we need to use the sign-extending (SI -> DI) vop.vx variants. */
if (immediate_operand (*scalar_op, Pmode))
{
if (!rtx_equal_p (*scalar_op, const0_rtx))
@@ -2164,40 +2234,29 @@ sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl,
return false;
}
- bool avoid_strided_broadcast = false;
+ /* Now we're left with a 64-bit immediate or a register.
+ We cannot use a vop.vx variant but must broadcast the value first
+ and switch to a vop.vv variant.
+ Broadcast can either be done via vlse64.v v1, reg, zero
+ or by loading one 64-bit element (vle64.v) and using a
+ broadcast vrgather.vi. This is decided when splitting
+ the strided broadcast insn. */
+ gcc_assert (!TARGET_64BIT
+ && (CONST_INT_P (*scalar_op)
+ || register_operand (*scalar_op, scalar_mode)));
+
if (CONST_INT_P (*scalar_op))
{
if (maybe_gt (GET_MODE_SIZE (scalar_mode), GET_MODE_SIZE (Pmode)))
- {
- if (strided_load_broadcast_p ())
- *scalar_op = force_const_mem (scalar_mode, *scalar_op);
- else
- avoid_strided_broadcast = true;
- }
+ *scalar_op = force_const_mem (scalar_mode, *scalar_op);
else
*scalar_op = force_reg (scalar_mode, *scalar_op);
}
rtx tmp = gen_reg_rtx (vector_mode);
- if (!avoid_strided_broadcast)
- {
- rtx ops[] = {tmp, *scalar_op};
- emit_avltype_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops,
- type, vl);
- }
- else
- {
- /* Load scalar as V1DI and broadcast via vrgather.vi. */
- rtx tmp1 = gen_reg_rtx (V1DImode);
- emit_move_insn (tmp1, lowpart_subreg (V1DImode, *scalar_op,
- scalar_mode));
- tmp1 = lowpart_subreg (vector_mode, tmp1, V1DImode);
-
- rtx ops[] = {tmp, tmp1, CONST0_RTX (Pmode)};
- emit_vlmax_insn (code_for_pred_gather_scalar (vector_mode),
- BINARY_OP, ops);
- }
-
+ rtx ops[] = {tmp, *scalar_op};
+ emit_avltype_insn (code_for_pred_strided_broadcast (vector_mode),
+ UNARY_OP, ops, type, vl);
emit_vector_func (operands, tmp);
return true;
@@ -2591,8 +2650,7 @@ expand_vector_init_merge_repeating_sequence (rtx target,
/* Step 1: Broadcast the first pattern. */
rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))};
- emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()),
- UNARY_OP, ops);
+ expand_broadcast (builder.mode (), ops);
/* Step 2: Merge the rest iteration of pattern. */
for (unsigned int i = 1; i < builder.npatterns (); i++)
{
@@ -2605,8 +2663,7 @@ expand_vector_init_merge_repeating_sequence (rtx target,
if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x. */
{
rtx ops[] = {dup, merge_mask};
- emit_nonvlmax_insn (code_for_pred_broadcast (GET_MODE (dup)),
- SCALAR_MOVE_OP, ops, CONST1_RTX (Pmode));
+ expand_set_first (GET_MODE (dup), ops);
}
else /* vmv.v.x. */
{
@@ -2614,8 +2671,7 @@ expand_vector_init_merge_repeating_sequence (rtx target,
force_reg (GET_MODE_INNER (mask_int_mode), merge_mask)};
rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
Pmode);
- emit_nonvlmax_insn (code_for_pred_broadcast (mask_int_mode), UNARY_OP,
- ops, vl);
+ expand_broadcast (mask_int_mode, ops, vl);
}
emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup));
@@ -4706,20 +4762,20 @@ expand_reduction (unsigned unspec, unsigned unspec_for_vl0_safe,
rtx m1_tmp = gen_reg_rtx (m1_mode);
rtx scalar_move_ops[] = {m1_tmp, init};
- insn_code icode = code_for_pred_broadcast (m1_mode);
if (need_mask_operand_p (insn_flags))
{
if (need_vl0_safe)
- emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, const1_rtx);
+ expand_set_first (m1_mode, scalar_move_ops, const1_rtx);
else
- emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, vl_op);
+ expand_set_first (m1_mode, scalar_move_ops, vl_op);
}
else
- emit_vlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops);
+ expand_set_first (m1_mode, scalar_move_ops);
rtx m1_tmp2 = gen_reg_rtx (m1_mode);
rtx reduc_ops[] = {m1_tmp2, vector_src, m1_tmp};
+ insn_code icode;
if (need_vl0_safe)
icode = code_for_pred (unspec_for_vl0_safe, vmode);
else
@@ -5597,6 +5653,80 @@ expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2,
emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
}
+static enum insn_type
+get_insn_type_by_vxrm_val (int vxrm_val)
+{
+ enum insn_type itype;
+
+ switch (vxrm_val)
+ {
+ case VXRM_RNU:
+ itype = BINARY_OP_VXRM_RNU;
+ break;
+ case VXRM_RNE:
+ itype = BINARY_OP_VXRM_RNE;
+ break;
+ case VXRM_RDN:
+ itype = BINARY_OP_VXRM_RDN;
+ break;
+ case VXRM_ROD:
+ itype = BINARY_OP_VXRM_ROD;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ return itype;
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(v1, vec_dup(x))
+ and its' vxrm value. Aka the second op comes from the vec_duplicate,
+ and the first op is the vector reg. */
+
+void
+expand_vx_binary_vxrm_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2, int unspec,
+ int vxrm_val, machine_mode mode)
+{
+ enum insn_code icode;
+ enum insn_type itype = get_insn_type_by_vxrm_val (vxrm_val);
+ rtx ops[] = {op_0, op_1, op_2};
+
+ switch (unspec)
+ {
+ case UNSPEC_VAADDU:
+ icode = code_for_pred_scalar (unspec, mode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_vlmax_insn (icode, itype, ops);
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(vec_dup(x), v1)
+ and its' vxrm value. Aka the second op comes from the vec_duplicate,
+ and the first op is the vector reg. */
+
+void
+expand_vx_binary_vxrm_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, int unspec,
+ int vxrm_val, machine_mode mode)
+{
+ enum insn_code icode;
+ enum insn_type itype = get_insn_type_by_vxrm_val (vxrm_val);
+ rtx ops[] = {op_0, op_1, op_2};
+
+ switch (unspec)
+ {
+ case UNSPEC_VAADDU:
+ icode = code_for_pred_scalar (unspec, mode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_vlmax_insn (icode, itype, ops);
+}
+
/* Expand the binary vx combine with the format like v2 = vop(v1, vec_dup(x)).
Aka the second op comes from the vec_duplicate, and the first op is
the vector reg. */
@@ -5808,25 +5938,84 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
return count;
}
-/* Return true if the OP can be directly broadcast. */
+/* Return true if the OP can be broadcast with a
+ v[f]mv.v.[xif] instruction. */
+
bool
can_be_broadcast_p (rtx op)
{
machine_mode mode = GET_MODE (op);
- /* We don't allow RA (register allocation) reload generate
- (vec_duplicate:DI reg) in RV32 system wheras we allow
- (vec_duplicate:DI mem) in RV32 system. */
- if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode)
- && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode))
- && !satisfies_constraint_Wdm (op))
+
+ /* Zero always works and we can always put an immediate into a
+ register.
+ What's tricky is that for an immediate we don't know the
+ register's mode it will end up in, i.e. what element size
+ we want to broadcast. So even if the immediate is small it might
+ still end up in a DImode register that we cannot broadcast.
+ vmv.s.x, i.e. a single-element set can handle this, though,
+ because it implicitly sign-extends to SEW. */
+ if (rtx_equal_p (op, CONST0_RTX (mode))
+ || const_int_operand (op, Xmode))
+ return true;
+
+ /* Do not accept DImode broadcasts on !TARGET_64BIT. Those
+ are handled by strided broadcast. */
+ if (INTEGRAL_MODE_P (mode)
+ && maybe_gt (GET_MODE_SIZE (mode), UNITS_PER_WORD))
+ return false;
+
+ /* Non-register operands that can be forced into a register we can
+ handle. These don't need to use strided broadcast. */
+ if (INTEGRAL_MODE_P (mode)
+ && (memory_operand (op, mode) || CONST_POLY_INT_P (op))
+ && can_create_pseudo_p ())
+ return true;
+
+ /* Likewise, do not accept HFmode broadcast if we don't have
+ vfmv.v.f for 16-bit registers available. */
+ if (mode == HFmode && !TARGET_ZVFH)
+ return false;
+
+ /* Same for float, just that we can always handle 64-bit doubles
+ even on !TARGET_64BIT. We have ruled out 16-bit HF already
+ above. */
+ if (FLOAT_MODE_P (mode)
+ && (memory_operand (op, mode) || CONSTANT_P (op))
+ && can_create_pseudo_p ())
+ return true;
+
+ /* After excluding all the cases we cannot handle the register types
+ that remain can always be broadcast. */
+ if (register_operand (op, mode))
+ return true;
+
+ return false;
+}
+
+/* Returns true for all operands that cannot use vmv.vx, vfmv.vf,
+ vmv.s.x, or vfmv.s.f but rather need to go via memory. */
+
+bool
+strided_broadcast_p (rtx op)
+{
+ machine_mode mode = GET_MODE (op);
+ if (!memory_operand (op, mode)
+ && !register_operand (op, mode)
+ && !rtx_equal_p (op, CONST0_RTX (mode))
+ && !const_int_operand (op, mode))
return false;
- if (satisfies_constraint_K (op) || register_operand (op, mode)
- || (strided_load_broadcast_p () && satisfies_constraint_Wdm (op))
- || rtx_equal_p (op, CONST0_RTX (mode)))
+ /* !TARGET64_BIT does not have a vmv.v.x/vmv.s.x for 64-bit
+ DImode elements. */
+ if (INTEGRAL_MODE_P (mode)
+ && maybe_gt (GET_MODE_SIZE (mode), UNITS_PER_WORD))
+ return true;
+
+ /* Zvfhmin does not have a vfmv.v.f/vfmv.s.f. for 16-bit elements. */
+ if (!TARGET_ZVFH && mode == HFmode)
return true;
- return can_create_pseudo_p () && nonmemory_operand (op, mode);
+ return false;
}
void
@@ -5941,7 +6130,10 @@ whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
return false;
}
-/* Return true if we can transform vmv.v.x/vfmv.v.f to vmv.s.x/vfmv.s.f. */
+/* Return true if we can transform vmv.v.x/vfmv.v.f to vmv.s.x/vfmv.s.f.
+ That's the case if we're dealing with a scalar broadcast that
+ has VL = 1. */
+
bool
splat_to_scalar_move_p (rtx *ops)
{
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index bf5172c..7e4d396 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -643,7 +643,8 @@ public:
return e.use_exact_insn (code_for_pred_mov (e.vector_mode ()));
case OP_TYPE_x:
case OP_TYPE_f:
- return e.use_exact_insn (code_for_pred_broadcast (e.vector_mode ()));
+ return e.use_scalar_broadcast_insn
+ (code_for_pred_broadcast (e.vector_mode ()));
default:
gcc_unreachable ();
}
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index 8810af0..0db7549 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4753,7 +4753,10 @@ function_expander::use_ternop_insn (bool vd_accum_p, insn_code icode)
}
/* Implement the call using instruction ICODE, with a 1:1 mapping between
- arguments and input operands. */
+ arguments and input operands.
+ There are operands that cannot be broadcast using v[f]mv. In that case
+ we switch to a strided broadcast. */
+
rtx
function_expander::use_widen_ternop_insn (insn_code icode)
{
@@ -4794,7 +4797,10 @@ function_expander::use_widen_ternop_insn (insn_code icode)
}
/* Implement the call using instruction ICODE, with a 1:1 mapping between
- arguments and input operands. */
+ arguments and input operands.
+ There are operands that cannot be broadcast using v[f]mv. In that case
+ we switch to a strided broadcast. */
+
rtx
function_expander::use_scalar_move_insn (insn_code icode)
{
@@ -4812,6 +4818,37 @@ function_expander::use_scalar_move_insn (insn_code icode)
for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
add_input_operand (argno);
+ if (!can_be_broadcast_p (m_ops[3].value))
+ icode = code_for_pred_strided_broadcast (vector_mode ());
+
+ add_input_operand (Pmode, get_tail_policy_for_pred (pred));
+ add_input_operand (Pmode, get_mask_policy_for_pred (pred));
+ add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMAX));
+ return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, with a 1:1 mapping between
+ arguments and input operands. */
+rtx
+function_expander::use_scalar_broadcast_insn (insn_code icode)
+{
+ machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
+
+ /* Record the offset to get the argument. */
+ int arg_offset = 0;
+ add_all_one_mask_operand (mask_mode ());
+
+ if (use_real_merge_p (pred))
+ add_input_operand (arg_offset++);
+ else
+ add_vundef_operand (mode);
+
+ for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
+ add_input_operand (argno);
+
+ if (!can_be_broadcast_p (m_ops[3].value))
+ icode = code_for_pred_strided_broadcast (vector_mode ());
+
add_input_operand (Pmode, get_tail_policy_for_pred (pred));
add_input_operand (Pmode, get_mask_policy_for_pred (pred));
add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMAX));
diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h
index 1f2587a..86d8115 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -497,6 +497,7 @@ public:
rtx use_ternop_insn (bool, insn_code);
rtx use_widen_ternop_insn (insn_code);
rtx use_scalar_move_insn (insn_code);
+ rtx use_scalar_broadcast_insn (insn_code);
rtx generate_insn (insn_code);
/* The function call expression. */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 1275b03..3324819 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3967,13 +3967,27 @@ get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost)
{
gcc_assert (riscv_v_ext_mode_p (GET_MODE (x)));
- rtx op_0 = XEXP (x, 0);
- rtx op_1 = XEXP (x, 1);
+ rtx neg;
+ rtx op_0;
+ rtx op_1;
+
+ if (GET_CODE (x) == UNSPEC)
+ {
+ op_0 = XVECEXP (x, 0, 0);
+ op_1 = XVECEXP (x, 0, 1);
+ }
+ else
+ {
+ op_0 = XEXP (x, 0);
+ op_1 = XEXP (x, 1);
+ }
if (GET_CODE (op_0) == VEC_DUPLICATE
|| GET_CODE (op_1) == VEC_DUPLICATE)
return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
- else if (GET_CODE (op_0) == NEG && GET_CODE (op_1) == VEC_DUPLICATE)
+ else if (GET_CODE (neg = op_0) == NEG
+ && (GET_CODE (op_1) == VEC_DUPLICATE
+ || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE))
return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
else
return COSTS_N_INSNS (1);
@@ -4021,6 +4035,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
case SS_MINUS:
*total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
break;
+ case UNSPEC:
+ {
+ switch (XINT (op, 1))
+ {
+ case UNSPEC_VAADDU:
+ *total
+ = get_vector_binary_rtx_cost (op, scalar2vr_cost);
+ break;
+ default:
+ *total = COSTS_N_INSNS (1);
+ break;
+ }
+ }
+ break;
default:
*total = COSTS_N_INSNS (1);
break;
@@ -9049,7 +9077,7 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
add_reg_note (insn, REG_CFA_DEF_CFA,
- plus_constant (Pmode, temp1,
+ plus_constant (Pmode, temp2,
initial_cfa_offset + rounded_size));
RTX_FRAME_RELATED_P (insn) = 1;
}
@@ -10359,10 +10387,10 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
bool sched1 = can_create_pseudo_p ();
- unsigned int prev_dest_regno = (REG_P (SET_DEST (prev_set))
+ unsigned int prev_dest_regno = (prev_set && REG_P (SET_DEST (prev_set))
? REGNO (SET_DEST (prev_set))
: FIRST_PSEUDO_REGISTER);
- unsigned int curr_dest_regno = (REG_P (SET_DEST (curr_set))
+ unsigned int curr_dest_regno = (curr_set && REG_P (SET_DEST (curr_set))
? REGNO (SET_DEST (curr_set))
: FIRST_PSEUDO_REGISTER);
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index 5f6cc42..dbb48a4 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4013,6 +4013,14 @@
UNSPEC_VASUBU UNSPEC_VASUB UNSPEC_VSMUL
UNSPEC_VSSRL UNSPEC_VSSRA])
+(define_int_iterator VSAT_VX_OP_V_VDUP [
+ UNSPEC_VAADDU
+])
+
+(define_int_iterator VSAT_VX_OP_VDUP_V [
+ UNSPEC_VAADDU
+])
+
(define_int_iterator VSAT_ARITH_OP [UNSPEC_VAADDU UNSPEC_VAADD
UNSPEC_VASUBU UNSPEC_VASUB UNSPEC_VSMUL])
(define_int_iterator VSAT_SHIFT_OP [UNSPEC_VSSRL UNSPEC_VSSRA])
@@ -4047,6 +4055,14 @@
(UNSPEC_VSSRA "vsshift") (UNSPEC_VNCLIP "vnclip")
(UNSPEC_VNCLIPU "vnclip")])
+(define_int_attr sat_op_v_vdup [
+ (UNSPEC_VAADDU "aaddu")
+])
+
+(define_int_attr sat_op_vdup_v [
+ (UNSPEC_VAADDU "aaddu")
+])
+
(define_int_attr misc_op [(UNSPEC_VMSBF "sbf") (UNSPEC_VMSIF "sif") (UNSPEC_VMSOF "sof")
(UNSPEC_VFRSQRT7 "rsqrt7")])
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index baf215b..66b7670 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1551,20 +1551,44 @@
(define_expand "vec_duplicate<mode>"
[(set (match_operand:V_VLS 0 "register_operand")
(vec_duplicate:V_VLS
- (match_operand:<VEL> 1 "direct_broadcast_operand")))]
+ (match_operand:<VEL> 1 "any_broadcast_operand")))]
"TARGET_VECTOR"
{
- /* Early expand DImode broadcast in RV32 system to avoid RA reload
- generate (set (reg) (vec_duplicate:DI)). */
+ /* Don't keep a DImode broadcast for RV32 in the vec_duplicate form.
+ Otherwise combine or late combine could end up doing
+ "64-bit broadcast" (!= vmv.v.x)
+ + vadd.vv
+ = vadd.vx
+ which would be invalid. */
bool gt_p = maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode));
if (!FLOAT_MODE_P (<VEL>mode) && gt_p)
{
- riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
- riscv_vector::UNARY_OP, operands);
- DONE;
+ riscv_vector::emit_vlmax_insn
+ (code_for_pred_strided_broadcast
+ (<MODE>mode), riscv_vector::UNARY_OP, operands);
+ DONE;
}
- /* Otherwise, allow it fall into general vec_duplicate pattern
- which allow us to have vv->vx combine optimization in later pass. */
+
+ /* Even though we can eventually broadcast any permissible
+ constant by moving it into a register we need to force
+ any non-immediate one into a register here.
+ If we didn't do that we couldn't fwprop/late-combine
+ vec_duplicate 123.45f
+ + vfadd.vv
+ = vfadd.vf
+ because the constant is valid for vec_duplicate but not
+ for vfadd.vf. Therefore we need to do
+ fa0 = 123.45f
+ vec_duplicate fa0
+ + vfadd.vv
+ = vfadd.vf */
+ if (!satisfies_constraint_P (operands[1])
+ && !satisfies_constraint_J (operands[1])
+ && !rtx_equal_p (operands[1], CONST0_RTX (<VEL>mode))
+ && !memory_operand (operands[1], <VEL>mode))
+ operands[1] = force_reg (<VEL>mode, operands[1]);
+
+ /* Otherwise keep the vec_duplicate pattern until split. */
})
;; According to GCC internal:
@@ -1574,28 +1598,20 @@
(define_insn_and_split "*vec_duplicate<mode>"
[(set (match_operand:V_VLS 0 "register_operand")
(vec_duplicate:V_VLS
- (match_operand:<VEL> 1 "direct_broadcast_operand")))]
+ (match_operand:<VEL> 1 "any_broadcast_operand")))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
- if (!strided_load_broadcast_p ()
- && TARGET_ZVFHMIN && !TARGET_ZVFH && <VEL>mode == HFmode)
- {
- /* For Float16, reinterpret as HImode, broadcast and reinterpret
- back. */
- poly_uint64 nunits = GET_MODE_NUNITS (<MODE>mode);
- machine_mode vmodehi
- = riscv_vector::get_vector_mode (HImode, nunits).require ();
- rtx ops[] = {lowpart_subreg (vmodehi, operands[0], <MODE>mode),
- lowpart_subreg (HImode, operands[1], HFmode)};
- riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (vmodehi),
- riscv_vector::UNARY_OP, ops);
- }
- else
+ if (riscv_vector::can_be_broadcast_p (operands[1]))
riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
riscv_vector::UNARY_OP, operands);
+ else
+ riscv_vector::emit_vlmax_insn (code_for_pred_strided_broadcast
+ (<MODE>mode), riscv_vector::UNARY_OP,
+ operands);
+
DONE;
}
[(set_attr "type" "vector")]
@@ -2141,69 +2157,45 @@
(match_operand:V_VLS 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{
- /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
- has better chances to do vsetvl fusion in vsetvl pass. */
bool wrap_vec_dup = true;
rtx vec_cst = NULL_RTX;
- if (riscv_vector::splat_to_scalar_move_p (operands))
- {
- operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
- operands[3] = force_reg (<VEL>mode, operands[3]);
- }
- else if (immediate_operand (operands[3], <VEL>mode)
- && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
- && (/* -> pred_broadcast<mode>_zero */
- (vector_least_significant_set_mask_operand (operands[1],
- <VM>mode)
- && vector_const_0_operand (vec_cst, <MODE>mode))
- || (/* pred_broadcast<mode>_imm */
- vector_all_trues_mask_operand (operands[1], <VM>mode)
- && vector_const_int_or_double_0_operand (vec_cst,
- <MODE>mode))))
+ if (immediate_operand (operands[3], <VEL>mode)
+ && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+ && (/* -> pred_broadcast<mode>_zero */
+ (vector_least_significant_set_mask_operand (operands[1],
+ <VM>mode)
+ && vector_const_0_operand (vec_cst, <MODE>mode))
+ || (/* pred_broadcast<mode>_imm */
+ vector_all_trues_mask_operand (operands[1], <VM>mode)
+ && vector_const_int_or_double_0_operand (vec_cst,
+ <MODE>mode))))
{
operands[3] = vec_cst;
wrap_vec_dup = false;
}
- /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar. */
- else if (satisfies_constraint_Wdm (operands[3]))
- {
- if (satisfies_constraint_Wb1 (operands[1]))
- {
- /* Case 1: vmv.s.x (TA, x == memory) ==> vlse.v (TA) */
- if (satisfies_constraint_vu (operands[2]))
- operands[1] = CONSTM1_RTX (<VM>mode);
- else if (GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode))
- {
- /* Case 2: vmv.s.x (TU, x == memory) ==>
- vl = 0 or 1; + vlse.v (TU) in RV32 system */
- operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]);
- operands[1] = CONSTM1_RTX (<VM>mode);
- }
- else
- /* Case 3: load x (memory) to register. */
- operands[3] = force_reg (<VEL>mode, operands[3]);
- }
- }
- else if (GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)
- && (immediate_operand (operands[3], Pmode)
+ else if (GET_MODE_SIZE (<VEL>mode) > UNITS_PER_WORD
+ && satisfies_constraint_Wb1 (operands[1])
+ && (immediate_operand (operands[3], Xmode)
|| (CONST_POLY_INT_P (operands[3])
&& known_ge (rtx_to_poly_int64 (operands[3]), 0U)
- && known_le (rtx_to_poly_int64 (operands[3]), GET_MODE_SIZE (<MODE>mode)))))
+ && known_le (rtx_to_poly_int64 (operands[3]),
+ GET_MODE_SIZE (<MODE>mode)))))
{
rtx tmp = gen_reg_rtx (Pmode);
poly_int64 value = rtx_to_poly_int64 (operands[3]);
- emit_move_insn (tmp, gen_int_mode (value, Pmode));
+ emit_move_insn (tmp, gen_int_mode (value, Xmode));
operands[3] = gen_rtx_SIGN_EXTEND (<VEL>mode, tmp);
}
- /* Never load (const_int 0) into a register, that's silly. */
- else if (operands[3] == CONST0_RTX (<VEL>mode))
+
+ /* For a vmv.v.x never load (const_int 0) or valid immediate operands
+ into a register, because we can use vmv.v.i. */
+ else if (satisfies_constraint_Wc1 (operands[1])
+ && (satisfies_constraint_P (operands[3])
+ || operands[3] == CONST0_RTX (<VEL>mode)))
;
- /* If we're broadcasting [-16..15] across more than just
- element 0, then we can use vmv.v.i directly, thus avoiding
- the load of the constant into a GPR. */
- else if (CONST_INT_P (operands[3])
- && IN_RANGE (INTVAL (operands[3]), -16, 15)
- && !satisfies_constraint_Wb1 (operands[1]))
+ /* For vmv.s.x we have vmv.s.x v1, zero. */
+ else if (satisfies_constraint_Wb1 (operands[1])
+ && operands[3] == CONST0_RTX (<VEL>mode))
;
else
operands[3] = force_reg (<VEL>mode, operands[3]);
@@ -2211,131 +2203,68 @@
operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
})
-(define_insn_and_split "*pred_broadcast<mode>"
- [(set (match_operand:V_VLSI 0 "register_operand" "=vr, vr, vd, vd, vr, vr, vr, vr")
+(define_insn_and_rewrite "*pred_broadcast<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand" "=vr, vr, vr, vr")
(if_then_else:V_VLSI
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1")
- (match_operand 4 "vector_length_operand" "rvl,rvl,rvl,rvl,rvl,rvl,rvl,rvl")
- (match_operand 5 "const_int_operand" " i, i, i, i, i, i, i, i")
- (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i")
+ [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1,Wb1,Wb1")
+ (match_operand 4 "vector_length_operand" "rvl,rvl,rvl,rvl")
+ (match_operand 5 "const_int_operand" " i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(vec_duplicate:V_VLSI
- (match_operand:<VEL> 3 "direct_broadcast_operand" "rP,rP,Wdm,Wdm,Wdm,Wdm, rJ, rJ"))
- (match_operand:V_VLSI 2 "vector_merge_operand" "vu, 0, vu, 0, vu, 0, vu, 0")))]
+ (match_operand:<VEL> 3 "direct_broadcast_operand" " rP, rP, rJ, rJ"))
+ (match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"@
vmv.v.%o3\t%0,%3
vmv.v.%o3\t%0,%3
- vlse<sew>.v\t%0,%3,zero,%1.t
- vlse<sew>.v\t%0,%3,zero,%1.t
- vlse<sew>.v\t%0,%3,zero
- vlse<sew>.v\t%0,%3,zero
vmv.s.x\t%0,%z3
vmv.s.x\t%0,%z3"
- "(register_operand (operands[3], <VEL>mode)
- || CONST_POLY_INT_P (operands[3]))
- && GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)"
- [(const_int 0)]
- {
- gcc_assert (can_create_pseudo_p ());
- if (CONST_POLY_INT_P (operands[3]))
- {
- rtx tmp = gen_reg_rtx (<VEL>mode);
- emit_move_insn (tmp, operands[3]);
- operands[3] = tmp;
- }
-
- /* For SEW = 64 in RV32 system, we expand vmv.s.x:
- andi a2,a2,1
- vsetvl zero,a2,e64
- vlse64.v */
- if (satisfies_constraint_Wb1 (operands[1]))
- {
- operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]);
- operands[1] = CONSTM1_RTX (<VM>mode);
- }
-
- /* If the target doesn't want a strided-load broadcast we go with a regular
- V1DImode load and a broadcast gather. */
- if (strided_load_broadcast_p ())
- {
- rtx mem = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode),
- GET_MODE_ALIGNMENT (<VEL>mode));
- mem = validize_mem (mem);
- emit_move_insn (mem, operands[3]);
- mem = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (mem, 0)));
-
- emit_insn
- (gen_pred_broadcast<mode>
- (operands[0], operands[1], operands[2], mem,
- operands[4], operands[5], operands[6], operands[7]));
- }
- else
- {
- rtx tmp = gen_reg_rtx (V1DImode);
- emit_move_insn (tmp, lowpart_subreg (V1DImode, operands[3],
- <VEL>mode));
- tmp = lowpart_subreg (<MODE>mode, tmp, V1DImode);
-
- emit_insn
- (gen_pred_gather<mode>_scalar
- (operands[0], operands[1], operands[2], tmp, CONST0_RTX (Pmode),
- operands[4], operands[5], operands[6], operands[7]));
- }
- DONE;
- }
- [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
+ "&& (operands[1] == CONSTM1_RTX (<VM>mode)
+ && operands[4] == CONST1_RTX (Pmode)
+ && (register_operand (operands[3], <VEL>mode)
+ || satisfies_constraint_J (operands[3])))"
+{
+ /* A broadcast of a single element is just a vmv.s.x. */
+ operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
+}
+ [(set_attr "type" "vimov,vimov,vimovxv,vimovxv")
(set_attr "mode" "<MODE>")])
-(define_insn "*pred_broadcast<mode>_zvfh"
- [(set (match_operand:V_VLSF 0 "register_operand" "=vr, vr, vr, vr")
+(define_insn_and_rewrite "pred_broadcast<mode>_zvfh"
+ [(set (match_operand:V_VLSF 0 "register_operand" "=vr, vr, vr, vr")
(if_then_else:V_VLSF
(unspec:<VM>
- [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1")
- (match_operand 4 "vector_length_operand" "rvl, rvl, rvl, rvl")
- (match_operand 5 "const_int_operand" " i, i, i, i")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
+ [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1,Wb1,Wb1")
+ (match_operand 4 "vector_length_operand" "rvl,rvl,rvl,rvl")
+ (match_operand 5 "const_int_operand" " i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(vec_duplicate:V_VLSF
- (match_operand:<VEL> 3 "direct_broadcast_operand" " f, f, f, f"))
- (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:<VEL> 3 "direct_broadcast_operand" " f, f, f, f"))
+ (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"@
vfmv.v.f\t%0,%3
vfmv.v.f\t%0,%3
vfmv.s.f\t%0,%3
vfmv.s.f\t%0,%3"
+ "&& (operands[1] == CONSTM1_RTX (<VM>mode)
+ && operands[4] == CONST1_RTX (Pmode)
+ && (register_operand (operands[3], <VEL>mode)
+ || satisfies_constraint_J (operands[3])))"
+{
+ /* A broadcast of a single element is just a vfmv.s.f. */
+ operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
+}
[(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv")
(set_attr "mode" "<MODE>")])
-(define_insn "*pred_broadcast<mode>_zvfhmin"
- [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr")
- (if_then_else:V_VLSF_ZVFHMIN
- (unspec:<VM>
- [(match_operand:<VM> 1 "vector_broadcast_mask_operand" " vm, vm, Wc1, Wc1")
- (match_operand 4 "vector_length_operand" "rvl, rvl, rvl, rvl")
- (match_operand 5 "const_int_operand" " i, i, i, i")
- (match_operand 6 "const_int_operand" " i, i, i, i")
- (match_operand 7 "const_int_operand" " i, i, i, i")
- (reg:SI VL_REGNUM)
- (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
- (vec_duplicate:V_VLSF_ZVFHMIN
- (match_operand:<VEL> 3 "direct_broadcast_operand" " A, A, A, A"))
- (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand" " vu, 0, vu, 0")))]
- "TARGET_VECTOR && strided_load_broadcast_p ()"
- "@
- vlse<sew>.v\t%0,%3,zero,%1.t
- vlse<sew>.v\t%0,%3,zero,%1.t
- vlse<sew>.v\t%0,%3,zero
- vlse<sew>.v\t%0,%3,zero"
- [(set_attr "type" "vlds,vlds,vlds,vlds")
- (set_attr "mode" "<MODE>")])
-
(define_insn "*pred_broadcast<mode>_extended_scalar"
[(set (match_operand:V_VLSI_D 0 "register_operand" "=vr, vr, vr, vr")
(if_then_else:V_VLSI_D
@@ -2398,6 +2327,117 @@
[(set_attr "type" "vimov,vimov")
(set_attr "mode" "<MODE>")])
+(define_expand "@pred_strided_broadcast<mode>"
+ [(set (match_operand:V_VLS 0 "register_operand")
+ (if_then_else:V_VLS
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "strided_broadcast_mask_operand")
+ (match_operand 4 "vector_length_operand")
+ (match_operand 5 "const_int_operand")
+ (match_operand 6 "const_int_operand")
+ (match_operand 7 "const_int_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (vec_duplicate:V_VLS
+ (match_operand:<VEL> 3 "strided_broadcast_operand"))
+ (match_operand:V_VLS 2 "vector_merge_operand")))]
+ "TARGET_VECTOR"
+{
+ if (satisfies_constraint_Wb1 (operands[1]))
+ {
+ /* If we're asked to set a single element (like vmv.s.x but we
+ need to go via memory here) and the tail policy is agnostic
+ we can overwrite all elements.
+ Thus, set the mask to broadcast. */
+ operands[1] = CONSTM1_RTX (<VM>mode);
+ if (!satisfies_constraint_vu (operands[2])
+ && GET_MODE_SIZE (<VEL>mode) > UNITS_PER_WORD)
+ {
+ /* Case 2: vmv.s.x (TU, x == memory) ==>
+ vl = 0 or 1; + vlse.v (TU) in RV32 system */
+ /* In this case we must not overwrite the residual elements,
+ so set the vector length to 0/1. */
+ operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]);
+ }
+ }
+})
+
+(define_insn_and_split "*pred_strided_broadcast<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSI
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "strided_broadcast_mask_operand" " vm, vm,Wc1,Wc1")
+ (match_operand 4 "vector_length_operand" "rvl,rvl,rvl,rvl")
+ (match_operand 5 "const_int_operand" " i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (vec_duplicate:V_VLSI
+ (match_operand:<VEL> 3 "strided_broadcast_operand" " A, A, A, A"))
+ (match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ "TARGET_VECTOR"
+ "@
+ vlse<sew>.v\t%0,%3,zero,%1.t
+ vlse<sew>.v\t%0,%3,zero,%1.t
+ vlse<sew>.v\t%0,%3,zero
+ vlse<sew>.v\t%0,%3,zero"
+ "&& !strided_load_broadcast_p () && can_create_pseudo_p ()"
+ [(const_int 0)]
+ {
+ rtx tmp = gen_reg_rtx (V1DImode);
+ emit_move_insn (tmp, gen_lowpart (V1DImode, operands[3]));
+ tmp = lowpart_subreg (<MODE>mode, tmp, V1DImode);
+
+ emit_insn
+ (gen_pred_gather<mode>_scalar
+ (operands[0], operands[1], operands[2], tmp, CONST0_RTX (Pmode),
+ operands[4], operands[5], operands[6], operands[7]));
+ DONE;
+ }
+ [(set_attr "type" "vlds,vlds,vlds,vlds")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*pred_strided_broadcast<mode>_zvfhmin"
+ [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr")
+ (if_then_else:V_VLSF_ZVFHMIN
+ (unspec:<VM>
+ [(match_operand:<VM> 1 "strided_broadcast_mask_operand" " vm, vm, Wc1, Wc1")
+ (match_operand 4 "vector_length_operand" "rvl, rvl, rvl, rvl")
+ (match_operand 5 "const_int_operand" " i, i, i, i")
+ (match_operand 6 "const_int_operand" " i, i, i, i")
+ (match_operand 7 "const_int_operand" " i, i, i, i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (vec_duplicate:V_VLSF_ZVFHMIN
+ (match_operand:<VEL> 3 "strided_broadcast_operand" " A, A, A, A"))
+ (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ "TARGET_VECTOR"
+ "@
+ vlse<sew>.v\t%0,%3,zero,%1.t
+ vlse<sew>.v\t%0,%3,zero,%1.t
+ vlse<sew>.v\t%0,%3,zero
+ vlse<sew>.v\t%0,%3,zero"
+ "&& !strided_load_broadcast_p ()
+ && <VEL>mode == HFmode
+ && can_create_pseudo_p ()"
+ [(const_int 0)]
+ {
+ poly_uint64 nunits = GET_MODE_NUNITS (<MODE>mode);
+ machine_mode vmodehi
+ = riscv_vector::get_vector_mode (HImode, nunits).require ();
+ rtx ops[] = {gen_lowpart (vmodehi, operands[0]),
+ gen_lowpart (HImode, operands[3])};
+ riscv_vector::emit_avltype_insn (code_for_pred_broadcast (vmodehi),
+ riscv_vector::UNARY_OP, ops,
+ (riscv_vector::avl_type) INTVAL (operands[7]),
+ operands[4]);
+ DONE;
+ }
+ [(set_attr "type" "vlds,vlds,vlds,vlds")
+ (set_attr "mode" "<MODE>")])
+
+
;; -------------------------------------------------------------------------------
;; ---- Predicated Strided loads/stores
;; -------------------------------------------------------------------------------
@@ -4639,8 +4679,8 @@
;; Handle GET_MODE_INNER (mode) = DImode. We need to split them since
;; we need to deal with SEW = 64 in RV32 system.
(define_expand "@pred_<sat_op><mode>_scalar"
- [(set (match_operand:VI_D 0 "register_operand")
- (if_then_else:VI_D
+ [(set (match_operand:V_VLSI_D 0 "register_operand")
+ (if_then_else:V_VLSI_D
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand")
(match_operand 5 "vector_length_operand")
@@ -4651,10 +4691,10 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
- (unspec:VI_D
- [(match_operand:VI_D 3 "register_operand")
+ (unspec:V_VLSI_D
+ [(match_operand:V_VLSI_D 3 "register_operand")
(match_operand:<VEL> 4 "reg_or_int_operand")] VSAT_ARITH_OP)
- (match_operand:VI_D 2 "vector_merge_operand")))]
+ (match_operand:V_VLSI_D 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{
if (riscv_vector::sew64_scalar_helper (
@@ -4673,8 +4713,8 @@
})
(define_insn "*pred_<sat_op><mode>_scalar"
- [(set (match_operand:VI_D 0 "register_operand" "=vd, vr, vd, vr")
- (if_then_else:VI_D
+ [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr, vd, vr")
+ (if_then_else:V_VLSI_D
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
@@ -4685,18 +4725,18 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
- (unspec:VI_D
- [(match_operand:VI_D 3 "register_operand" " vr, vr, vr, vr")
+ (unspec:V_VLSI_D
+ [(match_operand:V_VLSI_D 3 "register_operand" " vr, vr, vr, vr")
(match_operand:<VEL> 4 "reg_or_0_operand" " rJ, rJ, rJ, rJ")] VSAT_ARITH_OP)
- (match_operand:VI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:V_VLSI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"v<sat_op>.vx\t%0,%3,%z4%p1"
[(set_attr "type" "<sat_insn_type>")
(set_attr "mode" "<MODE>")])
(define_insn "*pred_<sat_op><mode>_extended_scalar"
- [(set (match_operand:VI_D 0 "register_operand" "=vd, vr, vd, vr")
- (if_then_else:VI_D
+ [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr, vd, vr")
+ (if_then_else:V_VLSI_D
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl")
@@ -4707,11 +4747,11 @@
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
- (unspec:VI_D
- [(match_operand:VI_D 3 "register_operand" " vr, vr, vr, vr")
+ (unspec:V_VLSI_D
+ [(match_operand:V_VLSI_D 3 "register_operand" " vr, vr, vr, vr")
(sign_extend:<VEL>
(match_operand:<VSUBEL> 4 "reg_or_0_operand" " rJ, rJ, rJ, rJ"))] VSAT_ARITH_OP)
- (match_operand:VI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (match_operand:V_VLSI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR && !TARGET_64BIT"
"v<sat_op>.vx\t%0,%3,%z4%p1"
[(set_attr "type" "<sat_insn_type>")
diff --git a/gcc/config/riscv/xiangshan.md b/gcc/config/riscv/xiangshan.md
index 5ed6bac..34b4a8f 100644
--- a/gcc/config/riscv/xiangshan.md
+++ b/gcc/config/riscv/xiangshan.md
@@ -107,7 +107,8 @@
;; they are just dummies like this one.
(define_insn_reservation "xiangshan_alu_unknown" 1
(and (eq_attr "tune" "xiangshan")
- (eq_attr "type" "zicond,min,max,minu,maxu,clz,ctz,cpop,ghost,rotate,clmul,condmove,crypto,mvpair,rdvlenb,rdvl,wrvxrm,wrfrm,rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,vldux,vldox,vstux,vstox,vldff,vldr,vstr,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16"))
+ (eq_attr "type" "zicond,min,max,minu,maxu,clz,ctz,cpop,ghost,rotate,clmul,condmove,crypto,mvpair,rdvlenb,rdvl,wrvxrm,wrfrm,rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,vldux,vldox,vstux,vstox,vldff,vldr,vstr,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,sf_vc,sf_vc_se"))
+
"xs_alu_rs")
;; ----------------------------------------------------
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index b5e636c..a474e13 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17843,9 +17843,11 @@ f_constraint_p (const char *constraint)
for (size_t i = 0, c_len = strlen (constraint); i < c_len;
i += CONSTRAINT_LEN (constraint[i], constraint + i))
{
- if (constraint[i] == 'f')
+ if (constraint[i] == 'f'
+ || (constraint[i] == '{' && constraint[i + 1] == 'f'))
seen_f_p = true;
- if (constraint[i] == 'v')
+ if (constraint[i] == 'v'
+ || (constraint[i] == '{' && constraint[i + 1] == 'v'))
seen_v_p = true;
}
@@ -17935,7 +17937,8 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
continue;
bool allows_mem, allows_reg, is_inout;
bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs,
- &allows_mem, &allows_reg, &is_inout);
+ &allows_mem, &allows_reg, &is_inout,
+ nullptr);
gcc_assert (ok);
if (!f_constraint_p (constraint))
/* Long double with a constraint other than "=f" - nothing to do. */
@@ -17980,7 +17983,7 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
bool allows_mem, allows_reg;
bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
constraints.address (), &allows_mem,
- &allows_reg);
+ &allows_reg, nullptr);
gcc_assert (ok);
if (!f_constraint_p (constraint))
/* Long double with a constraint other than "f" (or "=f" for inout
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index b75cec1..02554c5 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -601,8 +601,8 @@ constantpool_address_p (const_rtx addr)
/* Make sure the address is word aligned. */
offset = XEXP (addr, 1);
- if ((!CONST_INT_P (offset))
- || ((INTVAL (offset) & 3) != 0))
+ if (! CONST_INT_P (offset)
+ || (INTVAL (offset) & 3) != 0)
return false;
sym = XEXP (addr, 0);
@@ -611,6 +611,7 @@ constantpool_address_p (const_rtx addr)
if (SYMBOL_REF_P (sym)
&& CONSTANT_POOL_ADDRESS_P (sym))
return true;
+
return false;
}
@@ -4694,29 +4695,32 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
}
}
+/* Return TRUE if the specified insn corresponds to one or more L32R machine
+ instructions. */
+
static bool
xtensa_is_insn_L32R_p (const rtx_insn *insn)
{
- rtx x = PATTERN (insn);
+ rtx pat, dest, src;
- if (GET_CODE (x) != SET)
+ /* "PATTERN (insn)" can be used without checking, see insn_cost()
+ in gcc/rtlanal.cc. */
+ if (GET_CODE (pat = PATTERN (insn)) != SET
+ || ! register_operand (dest = SET_DEST (pat), VOIDmode))
return false;
- x = XEXP (x, 1);
- if (MEM_P (x))
- {
- x = XEXP (x, 0);
- return (SYMBOL_REF_P (x) || CONST_INT_P (x))
- && CONSTANT_POOL_ADDRESS_P (x);
- }
-
- /* relaxed MOVI instructions, that will be converted to L32R by the
- assembler. */
- if (CONST_INT_P (x)
- && ! xtensa_simm12b (INTVAL (x)))
+ if (constantpool_mem_p (src = SET_SRC (pat)))
return true;
- return false;
+ /* Return true if:
+ - CONST16 instruction is not configured, and
+ - the source is some constant, and also
+ - negation of "the source is integer and fits into the immediate
+ field". */
+ return (!TARGET_CONST16
+ && CONSTANT_P (src)
+ && ! ((GET_MODE (dest) == SImode || GET_MODE (dest) == HImode)
+ && CONST_INT_P (src) && xtensa_simm12b (INTVAL (src))));
}
/* Compute a relative costs of RTL insns. This is necessary in order to
@@ -4725,7 +4729,7 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn)
static int
xtensa_insn_cost (rtx_insn *insn, bool speed)
{
- if (!(recog_memoized (insn) < 0))
+ if (! (recog_memoized (insn) < 0))
{
int len = get_attr_length (insn);
@@ -4738,7 +4742,7 @@ xtensa_insn_cost (rtx_insn *insn, bool speed)
/* "L32R" may be particular slow (implementation-dependent). */
if (xtensa_is_insn_L32R_p (insn))
- return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs);
+ return COSTS_N_INSNS ((1 + xtensa_extra_l32r_costs) * n);
/* Cost based on the pipeline model. */
switch (get_attr_type (insn))
@@ -4783,7 +4787,7 @@ xtensa_insn_cost (rtx_insn *insn, bool speed)
{
/* "L32R" itself plus constant in litpool. */
if (xtensa_is_insn_L32R_p (insn))
- len = 3 + 4;
+ len += (len / 3) * 4;
/* Consider fractional instruction length (for example, ".n"
short instructions or "L32R" litpool constants. */
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 029be99..629dfdd 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1297,7 +1297,10 @@
std::swap (operands[0], operands[1]);
std::swap (operands[2], operands[3]);
}
-})
+}
+ [(set_attr "type" "move,move,load,load,store")
+ (set_attr "mode" "DI")
+ (set_attr "length" "6,12,6,6,6")])
(define_split
[(set (match_operand:DI 0 "register_operand")
@@ -1344,7 +1347,7 @@
%v0s32i\t%1, %0
rsr\t%0, ACCLO
wsr\t%1, ACCLO"
- [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr")
+ [(set_attr "type" "move,move,move,load,store,store,move,move,move,load,move,load,load,store,rsr,wsr")
(set_attr "mode" "SI")
(set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")])
@@ -1410,7 +1413,7 @@
%v0s16i\t%1, %0
rsr\t%0, ACCLO
wsr\t%1, ACCLO"
- [(set_attr "type" "move,move,move,move,move,load,load,store,rsr,wsr")
+ [(set_attr "type" "move,move,move,move,load,load,load,store,rsr,wsr")
(set_attr "mode" "HI")
(set_attr "length" "2,2,3,3,3,3,3,3,3,3")])
@@ -1519,7 +1522,7 @@
const16\t%0, %t1\;const16\t%0, %b1
%v1l32i\t%0, %1
%v0s32i\t%1, %0"
- [(set_attr "type" "farith,fload,fstore,move,load,load,store,move,farith,farith,move,move,load,store")
+ [(set_attr "type" "farith,fload,fstore,move,load,load,store,move,farith,farith,load,move,load,store")
(set_attr "mode" "SF")
(set_attr "length" "3,3,3,2,3,2,2,3,3,3,3,6,3,3")])
@@ -1643,7 +1646,10 @@
std::swap (operands[0], operands[1]);
std::swap (operands[2], operands[3]);
}
-})
+}
+ [(set_attr "type" "move,load,move,load,load,store")
+ (set_attr "mode" "DF")
+ (set_attr "length" "6,6,12,6,6,6")])
;; Block moves