aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md24
-rw-r--r--gcc/config/gcn/gcn-valu.md14
-rw-r--r--gcc/config/i386/i386-options.cc2
-rw-r--r--gcc/config/i386/i386.md70
-rw-r--r--gcc/config/i386/sse.md11
-rw-r--r--gcc/config/loongarch/genopts/loongarch.opt.in4
-rw-r--r--gcc/config/loongarch/lasx.md139
-rw-r--r--gcc/config/loongarch/loongarch-protos.h6
-rw-r--r--gcc/config/loongarch/loongarch.cc218
-rw-r--r--gcc/config/loongarch/loongarch.md56
-rw-r--r--gcc/config/loongarch/loongarch.opt4
-rw-r--r--gcc/config/loongarch/loongarch.opt.urls3
-rw-r--r--gcc/config/loongarch/lsx.md111
-rw-r--r--gcc/config/loongarch/simd.md137
-rw-r--r--gcc/config/riscv/bitmanip.md27
-rw-r--r--gcc/config/riscv/riscv-avlprop.cc41
-rw-r--r--gcc/config/riscv/riscv-protos.h8
-rw-r--r--gcc/config/riscv/riscv-vector-builtins-bases.cc5
-rw-r--r--gcc/config/riscv/riscv-vsetvl.cc8
-rw-r--r--gcc/config/riscv/riscv.cc615
-rw-r--r--gcc/config/riscv/riscv.h15
-rw-r--r--gcc/config/riscv/vector.md2
-rw-r--r--gcc/config/xtensa/xtensa.md59
23 files changed, 1174 insertions, 405 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a121a18..e7c459d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3223,6 +3223,7 @@
DONE;
}
)
+
(define_insn "extend<mode><Vwide>2"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(float_extend:<VWIDE>
@@ -3232,6 +3233,29 @@
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
+/* A BF->SF is a shift left of 16, however shifts are expensive and the generic
+ middle-end expansion would force through DI move. Instead use EXT to do the
+ shift to get better throughput and don't go through GPRs. */
+
+(define_expand "extendbfsf2"
+ [(set (match_operand:SF 0 "register_operand" "=w")
+ (float_extend:SF
+ (match_operand:BF 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+{
+ rtx tmp0 = aarch64_gen_shareable_zero (V8BFmode);
+ rtx op0 = force_lowpart_subreg (V8BFmode, operands[1], BFmode);
+ rtx res = gen_reg_rtx (V8BFmode);
+ emit_insn (gen_aarch64_extv8bf (res, tmp0, op0, gen_int_mode (7, SImode)));
+ /* Subregs between floating point modes aren't allowed to change size, so go
+ through V4SFmode. */
+ res = force_lowpart_subreg (V4SFmode, res, V8BFmode);
+ res = force_lowpart_subreg (SFmode, res, V4SFmode);
+ emit_move_insn (operands[0], res);
+ DONE;
+})
+
+
;; Float narrowing operations.
(define_insn "aarch64_float_trunc_rodd_df"
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index a34d2e3..96c183d 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -3128,6 +3128,20 @@
}
[(set_attr "type" "mult")])
+(define_expand "abs<mode>2"
+ [(set (match_operand:V_INT 0 "register_operand")
+ (abs:V_INT (match_operand:V_INT 1 "register_operand")))]
+ ""
+ {
+ rtx vcc = gen_reg_rtx (DImode);
+ rtx zero = gcn_vec_constant (<MODE>mode, 0);
+ emit_insn (gen_vec_cmp<mode>di (vcc, gen_rtx_LT (VOIDmode, 0, 0),
+ operands[1], zero));
+ emit_insn (gen_sub<mode>3_exec (operands[0], zero, operands[1],
+ operands[1], vcc));
+ DONE;
+ })
+
;; }}}
;; {{{ FP binops - special cases
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index dadcf76..ba598a8 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2917,7 +2917,7 @@ ix86_option_override_internal (bool main_args_p,
else
{
opts->x_ix86_move_max = opts->x_prefer_vector_width_type;
- if (opts_set->x_ix86_move_max == PVW_NONE)
+ if (opts->x_ix86_move_max == PVW_NONE)
{
if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
opts->x_ix86_move_max = PVW_AVX512;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8a3e336..b812d8b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4982,7 +4982,7 @@
"TARGET_64BIT"
"@
{cltq|cdqe}
- movs{lq|x}\t{%1, %0|%0, %1}"
+ movs{lq|xd}\t{%1, %0|%0, %1}"
[(set_attr "type" "imovx")
(set_attr "mode" "DI")
(set_attr "prefix_0f" "0")
@@ -27353,6 +27353,72 @@
(match_dup 0))]
"peep2_reg_dead_p (2, operands[0])"
[(set (match_dup 2) (match_dup 1))])
+
+;; umax (a, add (a, b)) => [sum, ovf] = add (a, b); ovf ? a : sum
+;; umin (a, add (a, b)) => [sum, ovf] = add (a, b); ovf ? sum : a
+
+(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")])
+
+(define_int_iterator ovf_comm [1 2])
+
+(define_insn_and_split "*plus_within_<code><mode>3_<ovf_comm>"
+ [(set (match_operand:SWI248 0 "register_operand")
+ (umaxmin:SWI248
+ (plus:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
+ (match_operand:SWI248 2 "<general_operand>"))
+ (match_dup ovf_comm)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_CMOVE
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:SWI248 (match_dup 1) (match_dup 2))
+ (match_dup ovf_comm)))
+ (set (match_dup 3)
+ (plus:SWI248 (match_dup 1) (match_dup 2)))])
+ (set (match_dup 0)
+ (if_then_else:SWI248
+ (<ovf_add_cmp> (reg:CCC FLAGS_REG) (const_int 0))
+ (match_dup 3)
+ (match_dup ovf_comm)))]
+{
+ operands[<ovf_comm>] = force_reg (<MODE>mode, operands[<ovf_comm>]);
+ operands[3] = gen_reg_rtx (<MODE>mode);
+})
+
+;; umax (a, sub (a, b)) => [diff, udf] = sub (a, b); udf ? diff : a
+;; umin (a, sub (a, b)) => [diff, udf] = sub (a, b); udf ? a : diff
+
+(define_code_attr udf_sub_cmp [(umax "ltu") (umin "geu")])
+
+(define_insn_and_split "*minus_within_<code><mode>3"
+ [(set (match_operand:SWI248 0 "register_operand")
+ (umaxmin:SWI248
+ (minus:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")
+ (match_operand:SWI248 2 "<general_operand>"))
+ (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_CMOVE
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_dup 1) (match_dup 2)))
+ (set (match_dup 3)
+ (minus:SWI248 (match_dup 1) (match_dup 2)))])
+ (set (match_dup 0)
+ (if_then_else:SWI248
+ (<udf_sub_cmp> (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 3)
+ (match_dup 1)))]
+{
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ operands[3] = gen_reg_rtx (<MODE>mode);
+})
;; Misc patterns (?)
@@ -27859,7 +27925,7 @@
{
output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands);
output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands);
- return "movs{lq|x}\t{%2, %1|%1, %2}";
+ return "movs{lq|xd}\t{%2, %1|%1, %2}";
}
[(set_attr "type" "multi")
(set_attr "length" "24")])
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5eba992..7d91585 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -643,6 +643,9 @@
(define_mode_iterator VI2_AVX512F
[(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
+(define_mode_iterator VI2_AVX10_2
+ [(V32HI "TARGET_AVX10_2") (V16HI "TARGET_AVX2") V8HI])
+
(define_mode_iterator VI2_AVX512VNNIBW
[(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI")
(V16HI "TARGET_AVX2") V8HI])
@@ -32334,8 +32337,8 @@
(define_expand "usdot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX512F 1 "register_operand")
- (match_operand:VI2_AVX512F 2 "register_operand")
+ (match_operand:VI2_AVX10_2 1 "register_operand")
+ (match_operand:VI2_AVX10_2 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
"TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
{
@@ -32352,8 +32355,8 @@
(define_expand "udot_prod<sseunpackmodelower><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
- (match_operand:VI2_AVX512F 1 "register_operand")
- (match_operand:VI2_AVX512F 2 "register_operand")
+ (match_operand:VI2_AVX10_2 1 "register_operand")
+ (match_operand:VI2_AVX10_2 2 "register_operand")
(match_operand:<sseunpackmode> 3 "register_operand")]
"TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
{
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
index 39c1545..f0c089a 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -205,6 +205,10 @@ mmax-inline-memcpy-size=
Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save
-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024.
+mbreak-code=
+Target Joined UInteger Var(la_break_code) Init(-1) Save
+-mbreak-code=CODE Use 'break CODE' for traps supposed to be unrecoverable, or an 'amswap.w' instruction leading to INE if CODE is out of range.
+
Enum
Name(explicit_relocs) Type(int)
The code model option names for -mexplicit-relocs:
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index eed4d2b..7a91473 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -146,9 +146,6 @@
;; Only integer modes equal or larger than a word.
(define_mode_iterator ILASX_DW [V4DI V8SI])
-;; Only integer modes smaller than a word.
-(define_mode_iterator ILASX_HB [V16HI V32QI])
-
;; Only used for immediate set shuffle elements instruction.
(define_mode_iterator LASX_WHB_W [V8SI V16HI V32QI V8SF])
@@ -834,59 +831,6 @@
[(set_attr "type" "simd_div")
(set_attr "mode" "<MODE>")])
-(define_insn "xor<mode>3"
- [(set (match_operand:LASX 0 "register_operand" "=f,f,f")
- (xor:LASX
- (match_operand:LASX 1 "register_operand" "f,f,f")
- (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
- "ISA_HAS_LASX"
- "@
- xvxor.v\t%u0,%u1,%u2
- xvbitrevi.%v0\t%u0,%u1,%V2
- xvxori.b\t%u0,%u1,%B2"
- [(set_attr "type" "simd_logic,simd_bit,simd_logic")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "ior<mode>3"
- [(set (match_operand:LASX 0 "register_operand" "=f,f,f")
- (ior:LASX
- (match_operand:LASX 1 "register_operand" "f,f,f")
- (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
- "ISA_HAS_LASX"
- "@
- xvor.v\t%u0,%u1,%u2
- xvbitseti.%v0\t%u0,%u1,%V2
- xvori.b\t%u0,%u1,%B2"
- [(set_attr "type" "simd_logic,simd_bit,simd_logic")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "and<mode>3"
- [(set (match_operand:LASX 0 "register_operand" "=f,f,f")
- (and:LASX
- (match_operand:LASX 1 "register_operand" "f,f,f")
- (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))]
- "ISA_HAS_LASX"
-{
- switch (which_alternative)
- {
- case 0:
- return "xvand.v\t%u0,%u1,%u2";
- case 1:
- {
- rtx elt0 = CONST_VECTOR_ELT (operands[2], 0);
- unsigned HOST_WIDE_INT val = ~UINTVAL (elt0);
- operands[2] = loongarch_gen_const_int_vector (<MODE>mode, val & (-val));
- return "xvbitclri.%v0\t%u0,%u1,%V2";
- }
- case 2:
- return "xvandi.b\t%u0,%u1,%B2";
- default:
- gcc_unreachable ();
- }
-}
- [(set_attr "type" "simd_logic,simd_bit,simd_logic")
- (set_attr "mode" "<MODE>")])
-
(define_insn "one_cmpl<mode>2"
[(set (match_operand:ILASX 0 "register_operand" "=f")
(not:ILASX (match_operand:ILASX 1 "register_operand" "f")))]
@@ -1035,16 +979,6 @@
[(set_attr "type" "simd_fmadd")
(set_attr "mode" "<MODE>")])
-(define_insn "fnma<mode>4"
- [(set (match_operand:FLASX 0 "register_operand" "=f")
- (fma:FLASX (neg:FLASX (match_operand:FLASX 1 "register_operand" "f"))
- (match_operand:FLASX 2 "register_operand" "f")
- (match_operand:FLASX 3 "register_operand" "0")))]
- "ISA_HAS_LASX"
- "xvfnmsub.<flasxfmt>\t%u0,%u1,%u2,%u0"
- [(set_attr "type" "simd_fmadd")
- (set_attr "mode" "<MODE>")])
-
(define_expand "sqrt<mode>2"
[(set (match_operand:FLASX 0 "register_operand")
(sqrt:FLASX (match_operand:FLASX 1 "register_operand")))]
@@ -3633,69 +3567,38 @@
[(set_attr "type" "simd_store")
(set_attr "mode" "DI")])
-(define_expand "vec_widen_<su>add_hi_<mode>"
+(define_expand "vec_widen_<su><optab>_<hi_lo>_<mode>"
[(match_operand:<VDMODE256> 0 "register_operand")
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
- "ISA_HAS_LASX"
-{
- loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
- <u_bool>, true, "add");
- DONE;
-})
-
-(define_expand "vec_widen_<su>add_lo_<mode>"
- [(match_operand:<VDMODE256> 0 "register_operand")
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
- "ISA_HAS_LASX"
-{
- loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
- <u_bool>, false, "add");
- DONE;
-})
-
-(define_expand "vec_widen_<su>sub_hi_<mode>"
- [(match_operand:<VDMODE256> 0 "register_operand")
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
- "ISA_HAS_LASX"
-{
- loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
- <u_bool>, true, "sub");
- DONE;
-})
-
-(define_expand "vec_widen_<su>sub_lo_<mode>"
- [(match_operand:<VDMODE256> 0 "register_operand")
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
- "ISA_HAS_LASX"
-{
- loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
- <u_bool>, false, "sub");
- DONE;
-})
-
-(define_expand "vec_widen_<su>mult_hi_<mode>"
- [(match_operand:<VDMODE256> 0 "register_operand")
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
+ (match_operand:ILASX_WHB 1 "register_operand")
+ (match_operand:ILASX_WHB 2 "register_operand")
+ (any_extend (const_int 0))
+ (addsub (const_int 0) (const_int 0))
+ (const_int zero_one)]
"ISA_HAS_LASX"
{
+ rtx (*fn_even) (rtx, rtx, rtx) =
+gen_lasx_xv<optab>wev_<dlasxfmt>_<lasxfmt><u>;
+ rtx (*fn_odd) (rtx, rtx, rtx) =
+gen_lasx_xv<optab>wod_<dlasxfmt>_<lasxfmt><u>;
loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
- <u_bool>, true, "mult");
+ <zero_one>, fn_even, fn_odd);
DONE;
})
-(define_expand "vec_widen_<su>mult_lo_<mode>"
+(define_expand "vec_widen_<su>mult_<hi_lo>_<mode>"
[(match_operand:<VDMODE256> 0 "register_operand")
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
- (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
+ (match_operand:ILASX_WHB 1 "register_operand")
+ (match_operand:ILASX_WHB 2 "register_operand")
+ (any_extend (const_int 0))
+ (const_int zero_one)]
"ISA_HAS_LASX"
{
+ rtx (*fn_even) (rtx, rtx, rtx) =
+gen_lasx_xvmulwev_<dlasxfmt>_<lasxfmt><u>;
+ rtx (*fn_odd) (rtx, rtx, rtx) =
+gen_lasx_xvmulwod_<dlasxfmt>_<lasxfmt><u>;
loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
- <u_bool>, false, "mult");
+ <zero_one>, fn_even, fn_odd);
DONE;
})
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 6ecbe27..bec4368 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -198,7 +198,8 @@ extern void loongarch_register_frame_header_opt (void);
extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *);
extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode,
rtx *);
-extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *);
+extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool,
+ rtx (*)(rtx, rtx, rtx), rtx (*)(rtx, rtx, rtx));
/* Routines implemented in loongarch-c.c. */
void loongarch_cpu_cpp_builtins (cpp_reader *);
@@ -217,7 +218,8 @@ extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode);
extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type);
extern bool loongarch_option_valid_attribute_p (tree, tree, tree, int);
-extern void loongarch_option_override_internal (struct loongarch_target *, struct gcc_options *, struct gcc_options *);
+extern void loongarch_option_override_internal (struct loongarch_target *,
+ struct gcc_options *, struct gcc_options *);
extern void loongarch_reset_previous_fndecl (void);
extern void loongarch_save_restore_target_globals (tree new_tree);
extern void loongarch_register_pragmas (void);
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index c782cac..f7ce3aa 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1718,14 +1718,36 @@ loongarch_symbol_binds_local_p (const_rtx x)
bool
loongarch_const_vector_bitimm_set_p (rtx op, machine_mode mode)
{
- if (GET_CODE (op) == CONST_VECTOR && op != CONST0_RTX (mode))
+ if (GET_CODE (op) == CONST_VECTOR
+ && (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT))
{
- unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+ unsigned HOST_WIDE_INT val;
+
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ {
+ rtx val_s = CONST_VECTOR_ELT (op, 0);
+ const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s);
+ if (GET_MODE (val_s) == DFmode)
+ {
+ long tmp[2];
+ REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+ val = (unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0];
+ }
+ else
+ {
+ long tmp;
+ REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+ val = (unsigned HOST_WIDE_INT) tmp;
+ }
+ }
+ else
+ val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+
int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
if (vlog2 != -1)
{
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
return loongarch_const_vector_same_val_p (op, mode);
}
@@ -1740,14 +1762,35 @@ loongarch_const_vector_bitimm_set_p (rtx op, machine_mode mode)
bool
loongarch_const_vector_bitimm_clr_p (rtx op, machine_mode mode)
{
- if (GET_CODE (op) == CONST_VECTOR && op != CONSTM1_RTX (mode))
+ if (GET_CODE (op) == CONST_VECTOR
+ && (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT))
{
- unsigned HOST_WIDE_INT val = ~UINTVAL (CONST_VECTOR_ELT (op, 0));
+ unsigned HOST_WIDE_INT val;
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ {
+ rtx val_s = CONST_VECTOR_ELT (op, 0);
+ const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s);
+ if (GET_MODE (val_s) == DFmode)
+ {
+ long tmp[2];
+ REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+ val = ~((unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]);
+ }
+ else
+ {
+ long tmp;
+ REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+ val = ~((unsigned HOST_WIDE_INT) tmp);
+ }
+ }
+ else
+ val = ~UINTVAL (CONST_VECTOR_ELT (op, 0));
+
int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
if (vlog2 != -1)
{
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
return loongarch_const_vector_same_val_p (op, mode);
}
@@ -4056,6 +4099,17 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
*total = loongarch_cost->int_mult_di;
else
*total = loongarch_cost->int_mult_si;
+
+ /* Check for mul_widen. */
+ if ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+ && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
+ || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+ && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND))
+ {
+ *total += (set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
+ + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
+ return true;
+ }
return false;
case DIV:
@@ -5479,12 +5533,32 @@ loongarch_expand_conditional_move (rtx *operands)
}
}
+ auto is_binary_op_0_keep_orig = [](enum rtx_code code)
+ {
+ switch (code)
+ {
+ case PLUS:
+ case MINUS:
+ case IOR:
+ case XOR:
+ case ROTATE:
+ case ROTATERT:
+ case ASHIFT:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ return true;
+ default:
+ return false;
+ }
+ };
+
/* Check if the optimization conditions are met. */
if (value_if_true_insn
&& value_if_false_insn
- /* Make sure that value_if_false and var are the same. */
- && BINARY_P (value_if_true_insn_src
- = SET_SRC (single_set (value_if_true_insn)))
+ /* Make sure that the orig value OP 0 keep orig. */
+ && (value_if_true_insn_src
+ = SET_SRC (single_set (value_if_true_insn)))
+ && is_binary_op_0_keep_orig ( GET_CODE (value_if_true_insn_src))
/* Make sure that both value_if_true and value_if_false
has the same var. */
&& rtx_equal_p (XEXP (value_if_true_insn_src, 0),
@@ -6439,7 +6513,28 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
if (CONST_VECTOR_P (op))
{
machine_mode mode = GET_MODE_INNER (GET_MODE (op));
- unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+ rtx val_s = CONST_VECTOR_ELT (op, 0);
+ unsigned HOST_WIDE_INT val;
+
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s);
+ if (GET_MODE (val_s) == DFmode)
+ {
+ long tmp[2];
+ REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+ val = (unsigned HOST_WIDE_INT) (tmp[1] << 32 | tmp[0]);
+ }
+ else
+ {
+ long tmp;
+ REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+ val = (unsigned HOST_WIDE_INT) tmp;
+ }
+ }
+ else
+ val = UINTVAL (val_s);
+
int vlog2 = exact_log2 (val & GET_MODE_MASK (mode));
if (vlog2 != -1)
fprintf (file, "%d", vlog2);
@@ -8808,105 +8903,22 @@ loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p)
void
loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2,
- bool uns_p, bool high_p, const char *optab)
+ bool high_p, rtx (*fn_even) (rtx, rtx, rtx),
+ rtx (*fn_odd) (rtx, rtx, rtx))
{
machine_mode wmode = GET_MODE (dest);
machine_mode mode = GET_MODE (op1);
- rtx t1, t2, t3;
-
- t1 = gen_reg_rtx (wmode);
- t2 = gen_reg_rtx (wmode);
- t3 = gen_reg_rtx (wmode);
- switch (mode)
- {
- case V16HImode:
- if (!strcmp (optab, "add"))
- {
- if (!uns_p)
- {
- emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2));
- emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2));
- }
- else
- {
- emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2));
- emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2));
- }
- }
- else if (!strcmp (optab, "mult"))
- {
- if (!uns_p)
- {
- emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2));
- emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2));
- }
- else
- {
- emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2));
- emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2));
- }
- }
- else if (!strcmp (optab, "sub"))
- {
- if (!uns_p)
- {
- emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2));
- emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2));
- }
- else
- {
- emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2));
- emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2));
- }
- }
- break;
- case V32QImode:
- if (!strcmp (optab, "add"))
- {
- if (!uns_p)
- {
- emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2));
- emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2));
- }
- else
- {
- emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2));
- emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2));
- }
- }
- else if (!strcmp (optab, "mult"))
- {
- if (!uns_p)
- {
- emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2));
- emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2));
- }
- else
- {
- emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2));
- emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2));
- }
- }
- else if (!strcmp (optab, "sub"))
- {
- if (!uns_p)
- {
- emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2));
- emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2));
- }
- else
- {
- emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2));
- emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2));
- }
- }
- break;
+ gcc_assert (ISA_HAS_LASX
+ && GET_MODE_SIZE (mode) == 32
+ && mode != V4DImode);
- default:
- gcc_unreachable ();
- }
+ rtx t1 = gen_reg_rtx (wmode);
+ rtx t2 = gen_reg_rtx (wmode);
+ rtx t3 = gen_reg_rtx (wmode);
+ emit_insn (fn_even (t1, op1, op2));
+ emit_insn (fn_odd (t2, op1, op2));
loongarch_expand_vec_interleave (t3, t1, t2, high_p);
emit_move_insn (dest, gen_lowpart (wmode, t3));
}
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index a275a2d..625f30c 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -679,14 +679,22 @@
;; ....................
;;
-(define_insn "trap"
- [(trap_if (const_int 1) (const_int 0))]
+(define_insn "*trap"
+ [(trap_if (const_int 1) (match_operand 0 "const_int_operand"))]
""
{
- return "break\t0";
+ return (const_uimm15_operand (operands[0], VOIDmode)
+ ? "break\t%0"
+ : "amswap.w\t$r0,$r1,$r0");
}
[(set_attr "type" "trap")])
+(define_expand "trap"
+ [(trap_if (const_int 1) (match_dup 0))]
+ ""
+{
+ operands[0] = GEN_INT (la_break_code);
+})
;;
@@ -2523,6 +2531,38 @@
[(set_attr "type" "condmove")
(set_attr "mode" "<GPR:MODE>")])
+(define_insn_and_split "both_non_zero"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (and:DI (ne:DI (match_operand:DI 1 "register_operand" "r")
+ (const_int 0))
+ (ne:DI (match_operand:DI 2 "register_operand" "r")
+ (const_int 0))))]
+ "TARGET_64BIT"
+ "#"
+ "&& true"
+ [(set (match_dup 0)
+ (ne:DI (match_dup 1) (const_int 0)))
+ (set (match_dup 0)
+ (if_then_else:DI (ne:DI (match_dup 2) (const_int 0))
+ (match_dup 0)
+ (const_int 0)))])
+
+(define_insn_and_split "both_non_zero_subreg"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (and:DI (subreg:DI (ne:SI (match_operand:DI 1 "register_operand" "r")
+ (const_int 0)) 0)
+ (subreg:DI (ne:SI (match_operand:DI 2 "register_operand" "r")
+ (const_int 0)) 0)))]
+ "TARGET_64BIT"
+ "#"
+ "&& true"
+ [(set (match_dup 0)
+ (ne:DI (match_dup 1) (const_int 0)))
+ (set (match_dup 0)
+ (if_then_else:DI (ne:DI (match_dup 2) (const_int 0))
+ (match_dup 0)
+ (const_int 0)))])
+
;; fsel copies the 3rd argument when the 1st is non-zero and the 2nd
;; argument if the 1st is zero. This means operand 2 and 3 are
;; inverted in the instruction.
@@ -3041,6 +3081,16 @@
[(set_attr "type" "shift")
(set_attr "mode" "SI")])
+(define_insn "sign_extend_ashift<GPR:mode><SHORT:mode>"
+ [(set (match_operand:GPR 0 "register_operand" "=r")
+ (ashift:GPR
+ (sign_extend:GPR (match_operand:SHORT 1 "register_operand" "r"))
+ (match_operand:SI 2 "const_uimm5_operand")))]
+ "(GET_MODE_BITSIZE (<SHORT:MODE>mode) + INTVAL (operands[2])) == 32"
+ "slli.w\t%0,%1,%2"
+ [(set_attr "type" "shift")
+ (set_attr "mode" "<GPR:MODE>")])
+
(define_insn "*rotr<mode>3"
[(set (match_operand:GPR 0 "register_operand" "=r,r")
(rotatert:GPR (match_operand:GPR 1 "register_operand" "r,r")
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
index fbe61c0..628eabe 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -213,6 +213,10 @@ mmax-inline-memcpy-size=
Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save
-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024.
+mbreak-code=
+Target Joined UInteger Var(la_break_code) Init(-1) Save
+-mbreak-code=CODE Use 'break CODE' for traps supposed to be unrecoverable, or an 'amswap.w' instruction leading to INE if CODE is out of range.
+
Enum
Name(explicit_relocs) Type(int)
The code model option names for -mexplicit-relocs:
diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls
index 606a211..c93f046 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -48,6 +48,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mstrict-align-1)
mmax-inline-memcpy-size=
UrlSuffix(gcc/LoongArch-Options.html#index-mmax-inline-memcpy-size)
+mbreak-code=
+UrlSuffix(gcc/LoongArch-Options.html#index-mbreak-code)
+
mexplicit-relocs=
UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1)
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index fb0236b..cd87757 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -145,9 +145,6 @@
;; Only integer modes equal or larger than a word.
(define_mode_iterator ILSX_DW [V2DI V4SI])
-;; Only integer modes smaller than a word.
-(define_mode_iterator ILSX_HB [V8HI V16QI])
-
;;;; Only integer modes for fixed-point madd_q/maddr_q.
;;(define_mode_iterator ILSX_WH [V4SI V8HI])
@@ -654,59 +651,6 @@
[(set_attr "type" "simd_div")
(set_attr "mode" "<MODE>")])
-(define_insn "xor<mode>3"
- [(set (match_operand:LSX 0 "register_operand" "=f,f,f")
- (xor:LSX
- (match_operand:LSX 1 "register_operand" "f,f,f")
- (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
- "ISA_HAS_LSX"
- "@
- vxor.v\t%w0,%w1,%w2
- vbitrevi.%v0\t%w0,%w1,%V2
- vxori.b\t%w0,%w1,%B2"
- [(set_attr "type" "simd_logic,simd_bit,simd_logic")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "ior<mode>3"
- [(set (match_operand:LSX 0 "register_operand" "=f,f,f")
- (ior:LSX
- (match_operand:LSX 1 "register_operand" "f,f,f")
- (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
- "ISA_HAS_LSX"
- "@
- vor.v\t%w0,%w1,%w2
- vbitseti.%v0\t%w0,%w1,%V2
- vori.b\t%w0,%w1,%B2"
- [(set_attr "type" "simd_logic,simd_bit,simd_logic")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "and<mode>3"
- [(set (match_operand:LSX 0 "register_operand" "=f,f,f")
- (and:LSX
- (match_operand:LSX 1 "register_operand" "f,f,f")
- (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))]
- "ISA_HAS_LSX"
-{
- switch (which_alternative)
- {
- case 0:
- return "vand.v\t%w0,%w1,%w2";
- case 1:
- {
- rtx elt0 = CONST_VECTOR_ELT (operands[2], 0);
- unsigned HOST_WIDE_INT val = ~UINTVAL (elt0);
- operands[2] = loongarch_gen_const_int_vector (<MODE>mode, val & (-val));
- return "vbitclri.%v0\t%w0,%w1,%V2";
- }
- case 2:
- return "vandi.b\t%w0,%w1,%B2";
- default:
- gcc_unreachable ();
- }
-}
- [(set_attr "type" "simd_logic,simd_bit,simd_logic")
- (set_attr "mode" "<MODE>")])
-
(define_insn "one_cmpl<mode>2"
[(set (match_operand:ILSX 0 "register_operand" "=f")
(not:ILSX (match_operand:ILSX 1 "register_operand" "f")))]
@@ -852,16 +796,6 @@
[(set_attr "type" "simd_fmadd")
(set_attr "mode" "<MODE>")])
-(define_insn "fnma<mode>4"
- [(set (match_operand:FLSX 0 "register_operand" "=f")
- (fma:FLSX (neg:FLSX (match_operand:FLSX 1 "register_operand" "f"))
- (match_operand:FLSX 2 "register_operand" "f")
- (match_operand:FLSX 3 "register_operand" "0")))]
- "ISA_HAS_LSX"
- "vfnmsub.<flsxfmt>\t%w0,%w1,%w2,%w0"
- [(set_attr "type" "simd_fmadd")
- (set_attr "mode" "<MODE>")])
-
(define_expand "sqrt<mode>2"
[(set (match_operand:FLSX 0 "register_operand")
(sqrt:FLSX (match_operand:FLSX 1 "register_operand")))]
@@ -3220,3 +3154,48 @@
[(set (match_dup 0)
(vec_duplicate:V2DI (match_dup 1)))]
"")
+
+(define_expand "vec_widen_<su><optab>_<hi_lo>_<mode>"
+ [(match_operand:<VDMODE> 0 "register_operand")
+ (match_operand:ILSX_WHB 1 "register_operand")
+ (match_operand:ILSX_WHB 2 "register_operand")
+ (any_extend (const_int 0))
+ (addsub (const_int 0) (const_int 0))
+ (const_int zero_one)]
+ "ISA_HAS_LSX"
+{
+ rtx t_even = gen_reg_rtx (<VDMODE>mode);
+ rtx t_odd = gen_reg_rtx (<VDMODE>mode);
+ emit_insn (gen_lsx_v<optab>wev_<dlsxfmt>_<lsxfmt><u> (t_even, operands[1],
+ operands[2]));
+ emit_insn (gen_lsx_v<optab>wod_<dlsxfmt>_<lsxfmt><u> (t_odd, operands[1],
+ operands[2]));
+ if (<zero_one>)
+ emit_insn (gen_lsx_vilvh_<dlsxfmt> (operands[0], t_even, t_odd));
+ else
+ emit_insn (gen_lsx_vilvl_<dlsxfmt> (operands[0], t_even, t_odd));
+
+ DONE;
+})
+
+(define_expand "vec_widen_<su>mult_<hi_lo>_<mode>"
+ [(match_operand:<VDMODE> 0 "register_operand")
+ (match_operand:ILSX_WHB 1 "register_operand")
+ (match_operand:ILSX_WHB 2 "register_operand")
+ (any_extend (const_int 0))
+ (const_int zero_one)]
+ "ISA_HAS_LSX"
+{
+ rtx t_even = gen_reg_rtx (<VDMODE>mode);
+ rtx t_odd = gen_reg_rtx (<VDMODE>mode);
+ emit_insn (gen_lsx_vmulwev_<dlsxfmt>_<lsxfmt><u> (t_even, operands[1],
+ operands[2]));
+ emit_insn (gen_lsx_vmulwod_<dlsxfmt>_<lsxfmt><u> (t_odd, operands[1],
+ operands[2]));
+ if (<zero_one>)
+ emit_insn (gen_lsx_vilvh_<dlsxfmt> (operands[0], t_even, t_odd));
+ else
+ emit_insn (gen_lsx_vilvl_<dlsxfmt> (operands[0], t_even, t_odd));
+
+ DONE;
+})
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 4156b26..b73f65a 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -23,6 +23,10 @@
;; Integer modes supported by LASX.
(define_mode_iterator ILASX [V4DI V8SI V16HI V32QI])
+;; Only integer modes smaller than a word.
+(define_mode_iterator ILSX_HB [V8HI V16QI])
+(define_mode_iterator ILASX_HB [V16HI V32QI])
+
;; FP modes supported by LSX
(define_mode_iterator FLSX [V2DF V4SF])
@@ -38,6 +42,10 @@
;; All integer modes available
(define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")])
+;; All integer modes smaller than a word.
+(define_mode_iterator IVEC_HB [(ILSX_HB "ISA_HAS_LSX")
+ (ILASX_HB "ISA_HAS_LASX")])
+
;; All FP modes available
(define_mode_iterator FVEC [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")])
@@ -90,12 +98,18 @@
(V8HI "V4SI") (V16HI "V8SI")
(V16QI "V8HI") (V32QI "V16HI")])
+(define_mode_attr WVEC_QUARTER [(V8HI "V2DI") (V16HI "V4DI")
+ (V16QI "V4SI") (V32QI "V8SI")])
+
;; Lower-case version.
(define_mode_attr wvec_half [(V2DI "v1ti") (V4DI "v2ti")
(V4SI "v2di") (V8SI "v4di")
(V8HI "v4si") (V16HI "v8si")
(V16QI "v8hi") (V32QI "v16hi")])
+(define_mode_attr wvec_quarter [(V8HI "v2di") (V16HI "v4di")
+ (V16QI "v4si") (V32QI "v8si")])
+
;; Integer vector modes with the same length and unit size as a mode.
(define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI")
(V8HI "V8HI") (V16QI "V16QI")
@@ -124,12 +138,16 @@
(V8HI "h") (V16HI "h")
(V16QI "b") (V32QI "b")])
-;; Suffix for widening LSX or LASX instructions.
+;; Suffix for double widening LSX or LASX instructions.
(define_mode_attr simdfmt_w [(V2DI "q") (V4DI "q")
(V4SI "d") (V8SI "d")
(V8HI "w") (V16HI "w")
(V16QI "h") (V32QI "h")])
+;; Suffix for quadruple widening LSX or LASX instructions.
+(define_mode_attr simdfmt_qw [(V8HI "d") (V16HI "d")
+ (V16QI "w") (V32QI "w")])
+
;; Suffix for integer mode in LSX or LASX instructions with FP input but
;; integer output.
(define_mode_attr simdifmt_for_f [(V2DF "l") (V4DF "l")
@@ -169,6 +187,8 @@
(V4SI "uimm5") (V8SI "uimm5")
(V2DI "uimm6") (V4DI "uimm6")])
+(define_int_attr hi_lo [(0 "lo") (1 "hi")])
+
;; =======================================================================
;; For many LASX instructions, the only difference of it from the LSX
;; counterpart is the length of vector operands. Describe these LSX/LASX
@@ -431,6 +451,17 @@
[(set_attr "type" "simd_int_arith")
(set_attr "mode" "<MODE>")])
+;; <x>vfnmsub.{s/d}
+(define_insn "fnma<mode>4"
+ [(set (match_operand:FVEC 0 "register_operand" "=f")
+ (fma:FVEC (neg:FVEC (match_operand:FVEC 1 "register_operand" "f"))
+ (match_operand:FVEC 2 "register_operand" "f")
+ (match_operand:FVEC 3 "register_operand" "f")))]
+ "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+ "<x>vfnmsub.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2,%<wu>3"
+ [(set_attr "type" "simd_fmadd")
+ (set_attr "mode" "<MODE>")])
+
;; <x>vfcmp.*.{s/d} with defined RTX code
;; There are no fcmp.{sugt/suge/cgt/cge}.{s/d} menmonics in GAS, so we have
;; to reverse the operands ourselves :(.
@@ -826,6 +857,39 @@
DONE;
})
+(define_expand "<su>dot_prod<wvec_quarter><mode>"
+ [(match_operand:<WVEC_QUARTER> 0 "register_operand" "=f,f")
+ (match_operand:IVEC_HB 1 "register_operand" "f,f")
+ (match_operand:IVEC_HB 2 "register_operand" "f,f")
+ (match_operand:<WVEC_QUARTER> 3 "reg_or_0_operand" "f, YG")
+ (any_extend (const_int 0))]
+ ""
+{
+ rtx *op = operands;
+ rtx res_mulev = gen_reg_rtx (<WVEC_HALF>mode);
+ rtx res_mulod = gen_reg_rtx (<WVEC_HALF>mode);
+ rtx res_addev = gen_reg_rtx (<WVEC_QUARTER>mode);
+ rtx res_addod = gen_reg_rtx (<WVEC_QUARTER>mode);
+ emit_insn (gen_<simd_isa>_<x>vmulwev_<simdfmt_w>_<simdfmt><u>
+ (res_mulev, op[1], op[2]));
+ emit_insn (gen_<simd_isa>_<x>vmulwod_<simdfmt_w>_<simdfmt><u>
+ (res_mulod, op[1], op[2]));
+ emit_insn (gen_<simd_isa>_<x>vhaddw_<simdfmt_qw><u>_<simdfmt_w><u>
+ (res_addev, res_mulev, res_mulev));
+ emit_insn (gen_<simd_isa>_<x>vhaddw_<simdfmt_qw><u>_<simdfmt_w><u>
+ (res_addod, res_mulod, res_mulod));
+ if (op[3] == CONST0_RTX (<WVEC_QUARTER>mode))
+ emit_insn (gen_add<wvec_quarter>3 (op[0], res_addev,
+ res_addod));
+ else
+ {
+ emit_insn (gen_add<wvec_quarter>3 (res_addev, res_addev,
+ res_addod));
+ emit_insn (gen_add<wvec_quarter>3 (op[0], res_addev, op[3]));
+ }
+ DONE;
+})
+
(define_insn "simd_maddw_evod_<mode>_hetero"
[(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
(plus:<WVEC_HALF>
@@ -972,6 +1036,77 @@
DONE;
})
+(define_insn "xor<mode>3"
+ [(set (match_operand:ALLVEC 0 "register_operand" "=f,f,f")
+ (xor:ALLVEC
+ (match_operand:ALLVEC 1 "register_operand" "f,f,f")
+ (match_operand:ALLVEC 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
+ ""
+ "@
+ <x>vxor.v\t%<wu>0,%<wu>1,%<wu>2
+ <x>vbitrevi.%v0\t%<wu>0,%<wu>1,%V2
+ <x>vxori.b\t%<wu>0,%<wu>1,%B2"
+ [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "ior<mode>3"
+ [(set (match_operand:ALLVEC 0 "register_operand" "=f,f,f")
+ (ior:ALLVEC
+ (match_operand:ALLVEC 1 "register_operand" "f,f,f")
+ (match_operand:ALLVEC 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
+ ""
+ "@
+ <x>vor.v\t%<wu>0,%<wu>1,%<wu>2
+ <x>vbitseti.%v0\t%<wu>0,%<wu>1,%V2
+ <x>vori.b\t%<wu>0,%<wu>1,%B2"
+ [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "and<mode>3"
+ [(set (match_operand:ALLVEC 0 "register_operand" "=f,f,f")
+ (and:ALLVEC
+ (match_operand:ALLVEC 1 "register_operand" "f,f,f")
+ (match_operand:ALLVEC 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))]
+ ""
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "<x>vand.v\t%<wu>0,%<wu>1,%<wu>2";
+ case 1:
+ {
+ rtx elt0 = CONST_VECTOR_ELT (operands[2], 0);
+ unsigned HOST_WIDE_INT val;
+ if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT)
+ {
+ const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (elt0);
+ if (GET_MODE (elt0) == DFmode)
+ {
+ long tmp[2];
+ REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+ val = ~((unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]);
+ }
+ else
+ {
+ long tmp;
+ REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+ val = ~((unsigned HOST_WIDE_INT) tmp);
+ }
+ }
+ else
+ val = ~UINTVAL (elt0);
+ operands[2] = loongarch_gen_const_int_vector (<VIMODE>mode, val & (-val));
+ return "<x>vbitclri.%v0\t%<wu>0,%<wu>1,%V2";
+ }
+ case 2:
+ return "<x>vandi.b\t%<wu>0,%<wu>1,%B2";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+ (set_attr "mode" "<MODE>")])
+
; The LoongArch SX Instructions.
(include "lsx.md")
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 59b71ed..697198f 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1,4 +1,4 @@
-;); Machine description for RISC-V Bit Manipulation operations.
+;; Machine description for RISC-V Bit Manipulation operations.
;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
;; This file is part of GCC.
@@ -237,19 +237,20 @@
[(set_attr "type" "bitmanip")
(set_attr "mode" "<X:MODE>")])
-(define_insn_and_split "*<optab>_not_const<mode>"
- [(set (match_operand:X 0 "register_operand" "=r")
- (bitmanip_bitwise:X (not:X (match_operand:X 1 "register_operand" "r"))
- (match_operand:X 2 "const_arith_operand" "I")))
- (clobber (match_scratch:X 3 "=&r"))]
+(define_peephole2
+ [(match_scratch:X 4 "r")
+ (set (match_operand:X 0 "register_operand")
+ (not:X (match_operand:X 1 "register_operand")))
+ (set (match_operand:X 2 "register_operand")
+ (bitmanip_bitwise:X (match_dup 0)
+ (match_operand 3 "const_int_operand")))
+ (match_dup 4)]
"(TARGET_ZBB || TARGET_ZBKB) && !TARGET_ZCB
- && !optimize_function_for_size_p (cfun)"
- "#"
- "&& reload_completed"
- [(set (match_dup 3) (match_dup 2))
- (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 3)))]
- ""
- [(set_attr "type" "bitmanip")])
+ && !optimize_function_for_size_p (cfun)
+ && rtx_equal_p (operands[0], operands[2])
+ && riscv_const_insns (operands[3], false) == 1"
+ [(set (match_dup 4) (match_dup 3))
+ (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 4)))])
;; '(a >= 0) ? b : 0' is emitted branchless (from if-conversion). Without a
;; bit of extra help for combine (i.e., the below split), we end up emitting
diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc
index b8547a7..a42764e 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -77,6 +77,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "df.h"
#include "rtl-ssa.h"
+#include "rtl-iter.h"
#include "cfgcleanup.h"
#include "insn-attr.h"
#include "tm-constrs.h"
@@ -412,6 +413,46 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const
&& def1->insn ()->compare_with (insn) >= 0)
return NULL_RTX;
}
+ else
+ {
+ /* If the use is in a subreg e.g. in a store it is possible that
+ we punned the vector mode with a larger mode like
+ (subreg:V1SI (reg:V4QI 123)).
+ For an AVL of 1 that means we actually store one SImode
+ element and not 1 QImode elements. But the latter is what we
+ would propagate if we took the AVL operand literally.
+ Instead we scale it by the ratio of inner and outer mode
+ (4 in the example above). */
+ int factor = 1;
+ if (use->includes_subregs ())
+ {
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, use_insn->rtl (), NONCONST)
+ {
+ const_rtx x = *iter;
+ if (x
+ && SUBREG_P (x)
+ && REG_P (SUBREG_REG (x))
+ && REGNO (SUBREG_REG (x)) == use->regno ()
+ && known_eq (GET_MODE_SIZE (use->mode ()),
+ GET_MODE_SIZE (GET_MODE (x))))
+ {
+ if (can_div_trunc_p (GET_MODE_NUNITS (use->mode ()),
+ GET_MODE_NUNITS (GET_MODE (x)),
+ &factor))
+ {
+ gcc_assert (factor > 0);
+ break;
+ }
+ else
+ return NULL_RTX;
+ }
+ }
+ }
+
+ if (factor > 1)
+ new_use_avl = GEN_INT (INTVAL (new_use_avl) * factor);
+ }
if (!use_avl)
use_avl = new_use_avl;
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 013b1dd..570acb1 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -209,6 +209,11 @@ rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt);
+/* Routines implemented in riscv-vsetvl.cc. */
+extern bool has_vtype_op (rtx_insn *);
+extern bool mask_agnostic_p (rtx_insn *);
+extern rtx get_avl (rtx_insn *);
+extern bool vsetvl_insn_p (rtx_insn *);
/* Routines implemented in riscv-string.c. */
extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx);
@@ -834,7 +839,8 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx);
extern bool strided_load_broadcast_p (void);
extern bool riscv_prefer_agnostic_p (void);
extern bool riscv_use_divmod_expander (void);
-void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int);
+void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree,
+ rtx, tree, int, bool);
extern bool
riscv_option_valid_attribute_p (tree, tree, tree, int);
extern bool
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 7e4d396..22b77cc 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1793,12 +1793,13 @@ public:
The fold routines expect the replacement statement to have the
same lhs as the original call, so return the copy statement
rather than the field update. */
- gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
+ gassign *copy = gimple_build_assign (f.lhs, rhs_tuple);
/* Get a reference to the individual vector. */
tree field = tuple_type_field (TREE_TYPE (f.lhs));
tree lhs_array
- = build3 (COMPONENT_REF, TREE_TYPE (field), f.lhs, field, NULL_TREE);
+ = build3 (COMPONENT_REF, TREE_TYPE (field), unshare_expr (f.lhs),
+ field, NULL_TREE);
tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), lhs_array,
index, NULL_TREE, NULL_TREE);
gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 3586d0c..580ac9c 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -258,7 +258,7 @@ policy_to_str (bool agnostic_p)
/* Return true if it is an RVV instruction depends on VTYPE global
status register. */
-static bool
+bool
has_vtype_op (rtx_insn *rinsn)
{
return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
@@ -306,7 +306,7 @@ vector_config_insn_p (rtx_insn *rinsn)
}
/* Return true if it is vsetvldi or vsetvlsi. */
-static bool
+bool
vsetvl_insn_p (rtx_insn *rinsn)
{
if (!rinsn || !vector_config_insn_p (rinsn))
@@ -386,7 +386,7 @@ get_vl (rtx_insn *rinsn)
}
/* Helper function to get AVL operand. */
-static rtx
+rtx
get_avl (rtx_insn *rinsn)
{
if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
@@ -411,7 +411,7 @@ get_default_ma ()
}
/* Helper function to get MA operand. */
-static bool
+bool
mask_agnostic_p (rtx_insn *rinsn)
{
/* If it doesn't have MA, we return agnostic by default. */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d5de76c..e978f92 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -740,6 +740,7 @@ static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int,
bool *);
+static tree riscv_handle_rvv_vls_cc_attribute (tree *, tree, tree, int, bool *);
/* Defining target-specific uses of __attribute__. */
static const attribute_spec riscv_gnu_attributes[] =
@@ -763,6 +764,8 @@ static const attribute_spec riscv_gnu_attributes[] =
standard vector calling convention variant. Syntax:
__attribute__((riscv_vector_cc)). */
{"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL},
+ {"riscv_vls_cc", 0, 1, false, true, true, true,
+ riscv_handle_rvv_vls_cc_attribute, NULL},
/* This attribute is used to declare a new type, to appoint the exactly
bits size of the type. For example:
@@ -790,6 +793,8 @@ static const attribute_spec riscv_attributes[] =
standard vector calling convention variant. Syntax:
[[riscv::vector_cc]]. */
{"vector_cc", 0, 0, false, true, true, true, NULL, NULL},
+ {"vls_cc", 0, 1, false, true, true, true, riscv_handle_rvv_vls_cc_attribute,
+ NULL},
/* This attribute is used to declare a new type, to appoint the exactly
bits size of the type. For example:
@@ -3723,6 +3728,12 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
riscv_vector::emit_vec_extract (result, v,
gen_int_mode (index + i, Pmode));
+ /* The low-part must be zero-extended when ELEN == 32 and
+ mode == 64. */
+ if (num == 2 && i == 0)
+ emit_insn (gen_extend_insn (int_reg, result, mode, smode,
+ true));
+
if (i == 1)
{
if (UNITS_PER_WORD < mode_size)
@@ -5872,11 +5883,12 @@ typedef struct {
floating-point registers. */
static int
-riscv_flatten_aggregate_field (const_tree type,
- riscv_aggregate_field fields[2],
+riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields,
int n, HOST_WIDE_INT offset,
- bool ignore_zero_width_bit_field_p)
+ bool ignore_zero_width_bit_field_p,
+ bool vls_p = false, unsigned abi_vlen = 0)
{
+ int max_aggregate_field = vls_p ? 8 : 2;
switch (TREE_CODE (type))
{
case RECORD_TYPE:
@@ -5903,9 +5915,9 @@ riscv_flatten_aggregate_field (const_tree type,
else
{
HOST_WIDE_INT pos = offset + int_byte_position (f);
- n = riscv_flatten_aggregate_field (TREE_TYPE (f),
- fields, n, pos,
- ignore_zero_width_bit_field_p);
+ n = riscv_flatten_aggregate_field (
+ TREE_TYPE (f), fields, n, pos, ignore_zero_width_bit_field_p,
+ vls_p, abi_vlen);
}
if (n < 0)
return -1;
@@ -5915,13 +5927,14 @@ riscv_flatten_aggregate_field (const_tree type,
case ARRAY_TYPE:
{
HOST_WIDE_INT n_elts;
- riscv_aggregate_field subfields[2];
+ riscv_aggregate_field subfields[8];
tree index = TYPE_DOMAIN (type);
tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
- int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
- subfields, 0, offset,
- ignore_zero_width_bit_field_p);
-
+ int n_subfields
+ = riscv_flatten_aggregate_field (TREE_TYPE (type), subfields, 0,
+ offset,
+ ignore_zero_width_bit_field_p, vls_p,
+ abi_vlen);
/* Can't handle incomplete types nor sizes that are not fixed. */
if (n_subfields <= 0
|| !COMPLETE_TYPE_P (type)
@@ -5941,7 +5954,7 @@ riscv_flatten_aggregate_field (const_tree type,
for (HOST_WIDE_INT i = 0; i < n_elts; i++)
for (int j = 0; j < n_subfields; j++)
{
- if (n >= 2)
+ if (n >= max_aggregate_field)
return -1;
fields[n] = subfields[j];
@@ -5973,18 +5986,36 @@ riscv_flatten_aggregate_field (const_tree type,
}
default:
- if (n < 2
- && ((SCALAR_FLOAT_TYPE_P (type)
- && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
- || (INTEGRAL_TYPE_P (type)
- && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
+ poly_uint64 mode_size = GET_MODE_SIZE (TYPE_MODE (type));
+ if (vls_p)
{
- fields[n].type = type;
- fields[n].offset = offset;
- return n + 1;
+ gcc_assert (abi_vlen != 0);
+ if (n < max_aggregate_field
+ && (VECTOR_TYPE_P (type) && mode_size.is_constant ()
+ && (mode_size.to_constant () <= abi_vlen * 8)))
+ {
+ fields[n].type = type;
+ fields[n].offset = offset;
+ return n + 1;
+ }
+ else
+ return -1;
}
else
- return -1;
+ {
+ if (n < max_aggregate_field
+ && ((SCALAR_FLOAT_TYPE_P (type)
+ && mode_size.to_constant () <= UNITS_PER_FP_ARG)
+ || (INTEGRAL_TYPE_P (type)
+ && mode_size.to_constant () <= UNITS_PER_WORD)))
+ {
+ fields[n].type = type;
+ fields[n].offset = offset;
+ return n + 1;
+ }
+ else
+ return -1;
+ }
}
}
@@ -5993,14 +6024,16 @@ riscv_flatten_aggregate_field (const_tree type,
static int
riscv_flatten_aggregate_argument (const_tree type,
- riscv_aggregate_field fields[2],
- bool ignore_zero_width_bit_field_p)
+ riscv_aggregate_field *fields,
+ bool ignore_zero_width_bit_field_p,
+ bool vls_p = false, unsigned abi_vlen = 0)
{
if (!type || TREE_CODE (type) != RECORD_TYPE)
return -1;
return riscv_flatten_aggregate_field (type, fields, 0, 0,
- ignore_zero_width_bit_field_p);
+ ignore_zero_width_bit_field_p, vls_p,
+ abi_vlen);
}
/* See whether TYPE is a record whose fields should be returned in one or
@@ -6163,18 +6196,22 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
}
+static const predefined_function_abi &
+riscv_fntype_abi_1 (const_tree fntype, bool check_only);
+
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is 0. */
void
riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype,
- rtx, tree, int)
+ rtx, tree, int, bool check_only)
{
memset (cum, 0, sizeof (*cum));
if (fntype)
- cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
+ cum->variant_cc =
+ (riscv_cc) riscv_fntype_abi_1 (fntype, check_only).id ();
else
cum->variant_cc = RISCV_CC_BASE;
}
@@ -6197,7 +6234,7 @@ riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
static rtx
riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
- machine_mode mode, bool return_p)
+ machine_mode mode, bool return_p, bool vls_p = false)
{
gcc_assert (riscv_v_ext_mode_p (mode));
@@ -6233,8 +6270,9 @@ riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
- /* For scalable data and scalable tuple return value. */
- if (return_p)
+ /* For scalable data and scalable tuple return value.
+ For VLS CC, we may pass struct like tuple, so need defer the handling. */
+ if (return_p && !vls_p)
return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
/* Iterate through the USED_VRS array to find vector register groups that have
@@ -6271,6 +6309,224 @@ riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
return NULL_RTX;
}
+
+#define RISCV_ALL_VALID_ABI_VLEN(F) \
+ F (32) \
+ F (64) \
+ F (128) \
+ F (256) \
+ F (512) \
+ F (1024) \
+ F (2048) \
+ F (4096) \
+ F (8192) \
+ F (16384)
+
+/* Return true if CC is a variant of VLS CC. */
+
+static bool
+riscv_vls_cc_p (riscv_cc cc)
+{
+ switch (cc)
+ {
+#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \
+ case RISCV_CC_VLS_V_##ABI_VLEN:
+ RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE)
+
+#undef VLS_CC_ABI_VLEN_CASE
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* Get ABI_VLEN from cc. */
+
+static unsigned int
+riscv_get_cc_abi_vlen (riscv_cc cc)
+{
+ switch (cc)
+ {
+#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \
+ case RISCV_CC_VLS_V_##ABI_VLEN: \
+ return ABI_VLEN;
+ RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE)
+
+#undef VLS_CC_ABI_VLEN_CASE
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return true if ABI_VLEN is a valid for VLS_CC. */
+
+static bool
+riscv_valid_abi_vlen_vls_cc_p (unsigned abi_vlen)
+{
+ switch (abi_vlen)
+ {
+#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \
+ case ABI_VLEN:
+ RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE)
+
+#undef VLS_CC_ABI_VLEN_CASE
+ return true;
+ default:
+ return false;
+ }
+}
+
+static riscv_cc
+riscv_get_riscv_cc_by_abi_vlen (unsigned abi_vlen)
+{
+ switch (abi_vlen)
+ {
+#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \
+ case ABI_VLEN: \
+ return RISCV_CC_VLS_V_##ABI_VLEN;
+ RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE)
+
+#undef VLS_CC_ABI_VLEN_CASE
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Get a VLS type has same size as MODE in ABI_VLEN, but element is always
+ in integer mode. */
+
+static machine_mode
+riscv_get_vls_container_type (machine_mode mode, unsigned abi_vlen)
+{
+ machine_mode element_mode = GET_MODE_INNER (mode);
+ unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
+ unsigned int lmul = ROUND_UP (mode_size * 8, abi_vlen) / abi_vlen;
+
+ /* Always use integer mode to pass to simplify the logic - we allow pass
+ unsupported vector type in vector register, e.g. float16x4_t even no vector
+ fp16 support. */
+ switch (GET_MODE_SIZE (element_mode).to_constant ())
+ {
+ case 1:
+ element_mode = QImode;
+ break;
+ case 2:
+ element_mode = HImode;
+ break;
+ case 4:
+ element_mode = SImode;
+ break;
+ case 8:
+ element_mode = DImode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ scalar_mode smode = as_a<scalar_mode> (element_mode);
+ return get_lmul_mode (smode, lmul).require ();
+}
+
+/* Pass VLS type argument in vector argument register. */
+
+static rtx
+riscv_pass_vls_in_vr (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
+ machine_mode mode, bool return_p)
+{
+ gcc_assert (riscv_v_ext_vls_mode_p (mode));
+
+ unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc);
+ unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
+ unsigned int lmul = ROUND_UP (mode_size * 8, abi_vlen) / abi_vlen;
+
+ /* Put into memory if it need more than 8 registers (> LMUL 8). */
+ if (lmul > 8)
+ return NULL_RTX;
+
+ machine_mode vla_mode = riscv_get_vls_container_type (mode, abi_vlen);
+ rtx reg = riscv_get_vector_arg (info, cum, vla_mode,
+ return_p, /* vls_p */ true);
+
+ /* Can't get vector register to pass, pass by memory. */
+ if (!reg)
+ return NULL_RTX;
+
+ PUT_MODE (reg, mode);
+
+ return reg;
+}
+
+/* Pass aggregate with VLS type argument in vector argument registers. */
+
+static rtx
+riscv_pass_aggregate_in_vr (struct riscv_arg_info *info,
+ const CUMULATIVE_ARGS *cum, const_tree type,
+ bool return_p)
+{
+ riscv_aggregate_field fields[8];
+ unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc);
+ int i;
+ int n = riscv_flatten_aggregate_argument (type, fields, true,
+ /* vls_p */ true, abi_vlen);
+
+ if (n == -1)
+ return NULL_RTX;
+
+ /* Check all field has same size. */
+ unsigned int mode_size
+ = GET_MODE_SIZE (TYPE_MODE (fields[0].type)).to_constant ();
+ for (int i = 1; i < n; i++)
+ if (GET_MODE_SIZE (TYPE_MODE (fields[i].type)).to_constant () != mode_size)
+ return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
+
+ /* Check total size is <= abi_vlen * 8, we use up to 8 vector register to
+ pass argument. */
+ if (mode_size * 8 > abi_vlen)
+ return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
+
+ /* Backup cum->used_vrs since we will defer the update until
+ riscv_function_arg_advance. */
+ CUMULATIVE_ARGS local_cum;
+ memcpy (&local_cum, cum, sizeof (local_cum));
+
+ unsigned num_vrs = 0;
+
+ /* Allocate vector registers for the arguments. */
+ rtx expr_list[8];
+ for (i = 0; i < n; i++)
+ {
+ machine_mode mode = TYPE_MODE (fields[i].type);
+ machine_mode vla_mode = riscv_get_vls_container_type (mode, abi_vlen);
+ /* Use riscv_get_vector_arg with VLA type to simplify the calling
+ convention implementation. */
+ rtx reg
+ = riscv_get_vector_arg (info, &local_cum, vla_mode,
+ return_p, /* vls_p */true);
+
+ /* Can't get vector register to pass, pass by memory. */
+ if (!reg)
+ return NULL_RTX;
+
+ PUT_MODE (reg, mode);
+
+ expr_list[i]
+ = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (fields[i].offset));
+
+ num_vrs += info->num_vrs;
+
+ /* Set the corresponding register in USED_VRS to used status. */
+ for (unsigned int i = 0; i < info->num_vrs; i++)
+ {
+ gcc_assert (!local_cum.used_vrs[info->vr_offset + i]);
+ local_cum.used_vrs[info->vr_offset + i] = true;
+ }
+ }
+
+ info->num_vrs = num_vrs;
+
+ return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (n, expr_list));
+}
+
/* Fill INFO with information about a single argument, and return an RTL
pattern to pass or return the argument. Return NULL_RTX if argument cannot
pass or return in registers, then the argument may be passed by reference or
@@ -6363,7 +6619,17 @@ riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
return riscv_get_vector_arg (info, cum, mode, return_p);
- /* For vls mode aggregated in gpr. */
+ if (riscv_vls_cc_p (cum->variant_cc))
+ {
+ if (riscv_v_ext_vls_mode_p (mode))
+ return riscv_pass_vls_in_vr (info, cum, mode, return_p);
+
+ rtx ret = riscv_pass_aggregate_in_vr (info, cum, type, return_p);
+ if (ret)
+ return ret;
+ }
+
+ /* For vls mode aggregated in gpr (for non-VLS-CC). */
if (riscv_v_ext_vls_mode_p (mode))
return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
}
@@ -6420,7 +6686,8 @@ riscv_function_arg_advance (cumulative_args_t cum_v,
cum->used_vrs[info.vr_offset + i] = true;
}
- if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
+ if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V
+ && !riscv_vls_cc_p (cum->variant_cc))
{
error ("RVV type %qT cannot be passed to an unprototyped function",
arg.type);
@@ -6463,7 +6730,8 @@ riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type,
{
const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ?
TREE_TYPE (fn_decl_or_type) : fn_decl_or_type;
- riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0);
+ riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0,
+ /* check_only */true);
}
else
memset (&args, 0, sizeof args);
@@ -6532,14 +6800,20 @@ riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
/* Implement TARGET_RETURN_IN_MEMORY. */
static bool
-riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+riscv_return_in_memory (const_tree type, const_tree fntype)
{
CUMULATIVE_ARGS args;
+
+ if (fntype)
+ riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0,
+ /* check_only */true);
+ else
+ /* The rules for returning in memory are the same as for passing the
+ first named argument by reference. */
+ memset (&args, 0, sizeof args);
+
cumulative_args_t cum = pack_cumulative_args (&args);
- /* The rules for returning in memory are the same as for passing the
- first named argument by reference. */
- memset (&args, 0, sizeof args);
function_arg_info arg (const_cast<tree> (type), /*named=*/true);
return riscv_pass_by_reference (cum, arg);
}
@@ -6583,9 +6857,9 @@ riscv_setup_incoming_varargs (cumulative_args_t cum,
/* Return the descriptor of the Standard Vector Calling Convention Variant. */
static const predefined_function_abi &
-riscv_v_abi ()
+riscv_v_abi (riscv_cc abi)
{
- predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
+ predefined_function_abi &v_abi = function_abis[abi];
if (!v_abi.initialized_p ())
{
HARD_REG_SET full_reg_clobbers
@@ -6595,7 +6869,7 @@ riscv_v_abi ()
CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
- v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
+ v_abi.initialize (abi, full_reg_clobbers);
}
return v_abi;
}
@@ -6756,13 +7030,14 @@ riscv_validate_vector_type (const_tree type, const char *hint)
RISC-V V registers. */
static bool
-riscv_return_value_is_vector_type_p (const_tree fntype)
+riscv_return_value_is_vector_type_p (const_tree fntype, bool check_only)
{
tree return_type = TREE_TYPE (fntype);
if (riscv_vector_type_p (return_type))
{
- riscv_validate_vector_type (return_type, "return type");
+ if (!check_only)
+ riscv_validate_vector_type (return_type, "return type");
return true;
}
else
@@ -6773,7 +7048,7 @@ riscv_return_value_is_vector_type_p (const_tree fntype)
RISC-V V registers. */
static bool
-riscv_arguments_is_vector_type_p (const_tree fntype)
+riscv_arguments_is_vector_type_p (const_tree fntype, bool check_only)
{
for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
chain = TREE_CHAIN (chain))
@@ -6781,7 +7056,8 @@ riscv_arguments_is_vector_type_p (const_tree fntype)
tree arg_type = TREE_VALUE (chain);
if (riscv_vector_type_p (arg_type))
{
- riscv_validate_vector_type (arg_type, "argument type");
+ if (!check_only)
+ riscv_validate_vector_type (arg_type, "argument type");
return true;
}
}
@@ -6792,14 +7068,15 @@ riscv_arguments_is_vector_type_p (const_tree fntype)
/* Return true if FUNC is a riscv_vector_cc function.
For more details please reference the below link.
https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
+
static bool
-riscv_vector_cc_function_p (const_tree fntype)
+riscv_vector_cc_function_p (const_tree fntype, bool check_only)
{
tree attr = TYPE_ATTRIBUTES (fntype);
bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE
|| lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE;
- if (vector_cc_p && !TARGET_VECTOR)
+ if (vector_cc_p && !TARGET_VECTOR && !check_only)
error_at (input_location,
"function attribute %qs requires the V ISA extension",
"riscv_vector_cc");
@@ -6807,26 +7084,91 @@ riscv_vector_cc_function_p (const_tree fntype)
return vector_cc_p;
}
-/* Implement TARGET_FNTYPE_ABI. */
+/* Return the riscv_cc value according to the attribute arguments.
+ If the attribute arguments are invalid, return RISCV_CC_UNKNOWN
+ and emit an error message. */
+
+static riscv_cc
+riscv_get_vls_cc_attr (const_tree args, bool check_only = false)
+{
+ /* Default ABI_VLEN is 128. */
+ int abi_vlen = 128;
+
+ if (args && TREE_CODE (args) == TREE_LIST)
+ {
+ tree vlen_arg = TREE_VALUE (args);
+ if (vlen_arg && TREE_CODE (vlen_arg) == INTEGER_CST)
+ abi_vlen = TREE_INT_CST_LOW (vlen_arg);
+ }
+
+ if (!riscv_valid_abi_vlen_vls_cc_p (abi_vlen) && !check_only)
+ {
+ error_at (input_location,
+ "unsupported %<ABI_VLEN%> value %d for %qs attribute;"
+ "%<ABI_VLEN must%> be in the range [32, 16384] and must be "
+ "a power of 2",
+ abi_vlen, "riscv_vls_cc");
+ return RISCV_CC_UNKNOWN;
+ }
+
+ return riscv_get_riscv_cc_by_abi_vlen (abi_vlen);
+}
+
+/* Return true if FUNC is a riscv_vector_cc function.
+ For more details please reference the below link.
+ https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
+static riscv_cc
+riscv_vls_cc_function_abi (const_tree fntype, bool check_only)
+{
+ tree attr = TYPE_ATTRIBUTES (fntype);
+ bool vls_cc_p = lookup_attribute ("vls_cc", attr) != NULL_TREE
+ || lookup_attribute ("riscv_vls_cc", attr) != NULL_TREE;
+
+ if (!vls_cc_p)
+ return RISCV_CC_UNKNOWN;
+
+ if (!TARGET_VECTOR && !check_only)
+ error_at (input_location,
+ "function attribute %qs requires the vector ISA extension",
+ "riscv_vls_cc");
+
+ tree args = TREE_VALUE (attr);
+ return riscv_get_vls_cc_attr (args);
+}
+
+/* Implemention of TARGET_FNTYPE_ABI, but one extra parameter `check_only`
+ to suppress warning message. */
static const predefined_function_abi &
-riscv_fntype_abi (const_tree fntype)
+riscv_fntype_abi_1 (const_tree fntype, bool check_only)
{
/* Implement the vector calling convention. For more details please
reference the below link.
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */
bool validate_v_abi_p = false;
- validate_v_abi_p |= riscv_return_value_is_vector_type_p (fntype);
- validate_v_abi_p |= riscv_arguments_is_vector_type_p (fntype);
- validate_v_abi_p |= riscv_vector_cc_function_p (fntype);
+ validate_v_abi_p |= riscv_return_value_is_vector_type_p (fntype, check_only);
+ validate_v_abi_p |= riscv_arguments_is_vector_type_p (fntype, check_only);
+ validate_v_abi_p |= riscv_vector_cc_function_p (fntype, check_only);
if (validate_v_abi_p)
- return riscv_v_abi ();
+ return riscv_v_abi (RISCV_CC_V);
+
+ riscv_cc abi = riscv_vls_cc_function_abi (fntype, check_only);
+ if (abi != RISCV_CC_UNKNOWN)
+ return riscv_v_abi (abi);
return default_function_abi;
}
+/* Implement TARGET_FNTYPE_ABI. */
+
+static const predefined_function_abi &
+riscv_fntype_abi (const_tree fntype)
+{
+ return riscv_fntype_abi_1 (fntype, /* check_only */true);
+}
+
/* Return riscv calling convention of call_insn. */
riscv_cc
get_riscv_cc (const rtx use)
@@ -6916,6 +7258,25 @@ riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
}
static tree
+riscv_handle_rvv_vls_cc_attribute (tree *, tree name, tree args,
+ ATTRIBUTE_UNUSED int flags,
+ bool *no_add_attrs)
+{
+ bool vls_cc_p = is_attribute_p ("vls_cc", name)
+ || is_attribute_p ("riscv_vls_cc", name);
+
+ if (!vls_cc_p)
+ return NULL_TREE;
+
+ riscv_cc cc = riscv_get_vls_cc_attr (args);
+
+ if (cc == RISCV_CC_UNKNOWN)
+ *no_add_attrs = true;
+
+ return NULL_TREE;
+}
+
+static tree
riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args,
ATTRIBUTE_UNUSED int flags,
bool *no_add_attrs)
@@ -10215,6 +10576,71 @@ riscv_issue_rate (void)
return tune_param->issue_rate;
}
+/* Structure for very basic vector configuration tracking in the scheduler. */
+struct last_vconfig
+{
+ bool valid;
+ bool ta;
+ bool ma;
+ uint8_t sew;
+ uint8_t vlmul;
+ rtx avl;
+} last_vconfig;
+
+/* Clear LAST_VCONFIG so we have no known state. */
+static void
+clear_vconfig (void)
+{
+ memset (&last_vconfig, 0, sizeof (last_vconfig));
+}
+
+/* Return TRUE if INSN is a vector insn needing a particular
+ vector configuration that is trivially equal to the last
+ vector insn issued. Return FALSE otherwise. */
+static bool
+compatible_with_last_vconfig (rtx_insn *insn)
+{
+ /* We might be able to extract the data from a preexisting vsetvl. */
+ if (vsetvl_insn_p (insn))
+ return false;
+
+ /* Nothing to do for these cases. */
+ if (!NONDEBUG_INSN_P (insn) || !has_vtype_op (insn))
+ return false;
+
+ extract_insn_cached (insn);
+
+ rtx avl = get_avl (insn);
+ if (avl != last_vconfig.avl)
+ return false;
+
+ if (get_sew (insn) != last_vconfig.sew)
+ return false;
+
+ if (get_vlmul (insn) != last_vconfig.vlmul)
+ return false;
+
+ if (tail_agnostic_p (insn) != last_vconfig.ta)
+ return false;
+
+ if (mask_agnostic_p (insn) != last_vconfig.ma)
+ return false;
+
+ /* No differences found, they're trivially compatible. */
+ return true;
+}
+
+/* Implement TARGET_SCHED_INIT, we use this to track the vector configuration
+ of the last issued vector instruction. We can then use that information
+ to potentially adjust the ready queue to issue instructions of a compatible
+ vector configuration instead of a conflicting configuration. That will
+ reduce the number of vsetvl instructions we ultimately emit. */
+static void
+riscv_sched_init (FILE *, int, int)
+{
+ clear_vconfig ();
+}
+
/* Implement TARGET_SCHED_VARIABLE_ISSUE. */
static int
riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
@@ -10239,9 +10665,88 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
an assert so we can find and fix this problem. */
gcc_assert (insn_has_dfa_reservation_p (insn));
+ /* If this is a vector insn with vl/vtype info, then record the last
+ vector configuration. */
+ if (vsetvl_insn_p (insn))
+ clear_vconfig ();
+ else if (NONDEBUG_INSN_P (insn) && has_vtype_op (insn))
+ {
+ extract_insn_cached (insn);
+
+ rtx avl = get_avl (insn);
+ if (avl == RVV_VLMAX)
+ avl = const0_rtx;
+
+ if (!avl || !CONST_INT_P (avl))
+ clear_vconfig ();
+ else
+ {
+ last_vconfig.valid = true;
+ last_vconfig.avl = avl;
+ last_vconfig.sew = get_sew (insn);
+ last_vconfig.vlmul = get_vlmul (insn);
+ last_vconfig.ta = tail_agnostic_p (insn);
+ last_vconfig.ma = mask_agnostic_p (insn);
+ }
+ }
+
return more - 1;
}
+/* Implement TARGET_SCHED_REORDER. The goal here is to look at the ready
+ queue and reorder it ever so slightly to encourage issing an insn with
+ the same vector configuration as the most recently issued vector
+ instruction. That will reduce vsetvl instructions. */
+static int
+riscv_sched_reorder (FILE *, int, rtx_insn **ready, int *nreadyp, int)
+{
+ /* If we don't have a valid prior vector configuration, then there is
+ no point in reordering the ready queue, similarly if there is
+ just one entry in the queue. */
+ if (!last_vconfig.valid || *nreadyp == 1)
+ return riscv_issue_rate ();
+
+ return riscv_issue_rate ();
+ int nready = *nreadyp;
+ int priority = INSN_PRIORITY (ready[nready - 1]);
+ for (int i = nready - 1; i >= 0; i--)
+ {
+ rtx_insn *insn = ready[i];
+
+ /* On a high performance core, vsetvl instructions should be
+ inexpensive. Removing them is very much a secondary concern, so
+ be extremely conservative with reordering, essentially only
+ allowing reordering within the highest priority value.
+
+ Lower end cores may benefit from more flexibility here. That
+ tuning is left to those who understand their core's behavior
+ and can thoroughly benchmark the result. Assuming such
+ designs appear, we can probably put an entry in the tuning
+ structure to indicate how much difference in priority to allow. */
+ if (INSN_PRIORITY (insn) < priority)
+ break;
+
+ if (compatible_with_last_vconfig (insn))
+ {
+ /* This entry is compatible with the last vconfig and has
+ the same priority as the most important insn. So swap
+ it so that we keep the vector configuration as-is and
+ ultimately eliminate a vsetvl.
+
+ Note no need to swap if this is the first entry in the
+ queue. */
+ if (i == nready - 1)
+ break;
+
+ std::swap (ready[i], ready[nready - 1]);
+ break;
+ }
+ }
+
+ return riscv_issue_rate ();
+}
+
+
/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
instruction fusion of some sort. */
@@ -11082,7 +11587,7 @@ riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
if (TREE_CODE (decl) == FUNCTION_DECL)
{
riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
- if (cc == RISCV_CC_V)
+ if (cc == RISCV_CC_V || riscv_vls_cc_p (cc))
{
fprintf (stream, "\t.variant_cc\t");
assemble_name (stream, name);
@@ -15650,9 +16155,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT riscv_sched_init
+
#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER riscv_sched_reorder
+
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 9146571..a0ad75c 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -779,6 +779,17 @@ enum riscv_cc
{
RISCV_CC_BASE = 0, /* Base standard RISC-V ABI. */
RISCV_CC_V, /* For functions that pass or return values in V registers. */
+ /* For functions that pass or return values in V registers. */
+ RISCV_CC_VLS_V_32,
+ RISCV_CC_VLS_V_64,
+ RISCV_CC_VLS_V_128,
+ RISCV_CC_VLS_V_256,
+ RISCV_CC_VLS_V_512,
+ RISCV_CC_VLS_V_1024,
+ RISCV_CC_VLS_V_2048,
+ RISCV_CC_VLS_V_4096,
+ RISCV_CC_VLS_V_8192,
+ RISCV_CC_VLS_V_16384,
RISCV_CC_UNKNOWN
};
@@ -786,6 +797,8 @@ typedef struct {
/* The calling convention that current function used. */
enum riscv_cc variant_cc;
+ unsigned int abi_vlen;
+
/* Number of integer registers used so far, up to MAX_ARGS_IN_REGISTERS. */
unsigned int num_gprs;
@@ -809,7 +822,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
riscv_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (INDIRECT), \
- (N_NAMED_ARGS) != -1)
+ (N_NAMED_ARGS) != -1, /* check_only */false)
#define EPILOGUE_USES(REGNO) riscv_epilogue_uses (REGNO)
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 3cb87bf..9d34725 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1437,6 +1437,8 @@
[(set_attr "type" "vlde,vste,vmov")
(set_attr "mode" "<MODE>")
(set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
+ (set (attr "has_vl_op") (const_string "false"))
+ (set (attr "has_vtype_op") (const_string "false"))
(set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
(set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
)
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 374288d..c713451 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -43,6 +43,7 @@
UNSPEC_FRAME_BLOCKAGE
UNSPEC_CEIL
UNSPEC_FLOOR
+ UNSPEC_ROUND
])
(define_c_enum "unspecv" [
@@ -104,8 +105,11 @@
;; This iterator and attribute allow FP-to-integer rounding of two types
;; to be generated from one template.
-(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR])
-(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor")])
+(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR UNSPEC_ROUND])
+(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor")
+ (UNSPEC_ROUND "round")])
+(define_int_attr c_round [(UNSPEC_CEIL "1") (UNSPEC_FLOOR "1")
+ (UNSPEC_ROUND "flag_unsafe_math_optimizations")])
;; Attributes.
@@ -680,35 +684,26 @@
(set_attr "mode" "SI")
(set_attr "length" "3")])
-(define_insn_and_split "one_cmplsi2"
- [(set (match_operand:SI 0 "register_operand" "=a")
- (not:SI (match_operand:SI 1 "register_operand" "r")))]
+(define_expand "one_cmplsi2"
+ [(set (match_operand:SI 0 "register_operand")
+ (not:SI (match_operand:SI 1 "register_operand")))]
""
- "#"
- "&& can_create_pseudo_p ()"
- [(set (match_dup 2)
- (const_int -1))
- (set (match_dup 0)
- (xor:SI (match_dup 1)
- (match_dup 2)))]
{
- operands[2] = gen_reg_rtx (SImode);
-}
- [(set_attr "type" "arith")
- (set_attr "mode" "SI")
- (set (attr "length")
- (if_then_else (match_test "TARGET_DENSITY")
- (const_int 5)
- (const_int 6)))])
+ emit_insn (gen_xorsi3 (operands[0], operands[1],
+ force_reg (SImode, constm1_rtx)));
+ DONE;
+})
(define_insn "negsf2"
- [(set (match_operand:SF 0 "register_operand" "=f")
- (neg:SF (match_operand:SF 1 "register_operand" "f")))]
+ [(set (match_operand:SF 0 "register_operand")
+ (neg:SF (match_operand:SF 1 "register_operand")))
+ (clobber (match_scratch:SI 2))]
"TARGET_HARD_FLOAT"
- "neg.s\t%0, %1"
- [(set_attr "type" "farith")
- (set_attr "mode" "SF")
- (set_attr "length" "3")])
+ {@ [cons: =0, 1, =2; attrs: type, length]
+ [D, D, &a; arith , 7] movi.n\t%2, 1\;slli\t%2, %2, 31\;add.n\t%0, %1, %2
+ [f, f, X; farith, 3] neg.s\t%0, %1
+ }
+ [(set_attr "mode" "SF")])
;; Logical instructions.
@@ -1150,7 +1145,7 @@
(define_insn "*fix<s_fix>_truncsfsi2_scaled"
[(set (match_operand:SI 0 "register_operand" "=a")
(any_fix:SI (mult:SF (match_operand:SF 1 "register_operand" "f")
- (match_operand:SF 2 "fix_scaling_operand" "F"))))]
+ (match_operand:SF 2 "fix_scaling_operand" ""))))]
"TARGET_HARD_FLOAT"
"<m_fix>.s\t%0, %1, %U2"
[(set_attr "type" "fconv")
@@ -1169,7 +1164,7 @@
(define_insn "*float<s_float>sisf2_scaled"
[(set (match_operand:SF 0 "register_operand" "=f")
(mult:SF (any_float:SF (match_operand:SI 1 "register_operand" "a"))
- (match_operand:SF 2 "float_scaling_operand" "F")))]
+ (match_operand:SF 2 "float_scaling_operand" "")))]
"TARGET_HARD_FLOAT"
"<m_float>.s\t%0, %1, %V2"
[(set_attr "type" "fconv")
@@ -1179,7 +1174,7 @@
(define_insn "l<m_round>sfsi2"
[(set (match_operand:SI 0 "register_operand" "=a")
(unspec:SI [(match_operand:SF 1 "register_operand" "f")] ANY_ROUND))]
- "TARGET_HARD_FLOAT"
+ "TARGET_HARD_FLOAT && <c_round>"
"<m_round>.s\t%0, %1, 0"
[(set_attr "type" "fconv")
(set_attr "mode" "SF")
@@ -1189,7 +1184,7 @@
[(set (match_operand:SI 0 "register_operand" "=a")
(unspec:SI [(plus:SF (match_operand:SF 1 "register_operand" "f")
(match_dup 1))] ANY_ROUND))]
- "TARGET_HARD_FLOAT"
+ "TARGET_HARD_FLOAT && <c_round>"
"<m_round>.s\t%0, %1, 1"
[(set_attr "type" "fconv")
(set_attr "mode" "SF")
@@ -1198,8 +1193,8 @@
(define_insn "*l<m_round>sfsi2_scaled"
[(set (match_operand:SI 0 "register_operand" "=a")
(unspec:SI [(mult:SF (match_operand:SF 1 "register_operand" "f")
- (match_operand:SF 2 "fix_scaling_operand" "F"))] ANY_ROUND))]
- "TARGET_HARD_FLOAT"
+ (match_operand:SF 2 "fix_scaling_operand" ""))] ANY_ROUND))]
+ "TARGET_HARD_FLOAT && <c_round>"
"<m_round>.s\t%0, %1, %U2"
[(set_attr "type" "fconv")
(set_attr "mode" "SF")