diff options
Diffstat (limited to 'gcc/config')
23 files changed, 1174 insertions, 405 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index a121a18..e7c459d 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3223,6 +3223,7 @@ DONE; } ) + (define_insn "extend<mode><Vwide>2" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (float_extend:<VWIDE> @@ -3232,6 +3233,29 @@ [(set_attr "type" "neon_fp_cvt_widen_s")] ) +/* A BF->SF is a shift left of 16, however shifts are expensive and the generic + middle-end expansion would force through DI move. Instead use EXT to do the + shift to get better throughput and don't go through GPRs. */ + +(define_expand "extendbfsf2" + [(set (match_operand:SF 0 "register_operand" "=w") + (float_extend:SF + (match_operand:BF 1 "register_operand" "w")))] + "TARGET_SIMD" +{ + rtx tmp0 = aarch64_gen_shareable_zero (V8BFmode); + rtx op0 = force_lowpart_subreg (V8BFmode, operands[1], BFmode); + rtx res = gen_reg_rtx (V8BFmode); + emit_insn (gen_aarch64_extv8bf (res, tmp0, op0, gen_int_mode (7, SImode))); + /* Subregs between floating point modes aren't allowed to change size, so go + through V4SFmode. */ + res = force_lowpart_subreg (V4SFmode, res, V8BFmode); + res = force_lowpart_subreg (SFmode, res, V4SFmode); + emit_move_insn (operands[0], res); + DONE; +}) + + ;; Float narrowing operations. (define_insn "aarch64_float_trunc_rodd_df" diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index a34d2e3..96c183d 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -3128,6 +3128,20 @@ } [(set_attr "type" "mult")]) +(define_expand "abs<mode>2" + [(set (match_operand:V_INT 0 "register_operand") + (abs:V_INT (match_operand:V_INT 1 "register_operand")))] + "" + { + rtx vcc = gen_reg_rtx (DImode); + rtx zero = gcn_vec_constant (<MODE>mode, 0); + emit_insn (gen_vec_cmp<mode>di (vcc, gen_rtx_LT (VOIDmode, 0, 0), + operands[1], zero)); + emit_insn (gen_sub<mode>3_exec (operands[0], zero, operands[1], + operands[1], vcc)); + DONE; + }) + ;; }}} ;; {{{ FP binops - special cases diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index dadcf76..ba598a8 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2917,7 +2917,7 @@ ix86_option_override_internal (bool main_args_p, else { opts->x_ix86_move_max = opts->x_prefer_vector_width_type; - if (opts_set->x_ix86_move_max == PVW_NONE) + if (opts->x_ix86_move_max == PVW_NONE) { if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) opts->x_ix86_move_max = PVW_AVX512; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8a3e336..b812d8b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4982,7 +4982,7 @@ "TARGET_64BIT" "@ {cltq|cdqe} - movs{lq|x}\t{%1, %0|%0, %1}" + movs{lq|xd}\t{%1, %0|%0, %1}" [(set_attr "type" "imovx") (set_attr "mode" "DI") (set_attr "prefix_0f" "0") @@ -27353,6 +27353,72 @@ (match_dup 0))] "peep2_reg_dead_p (2, operands[0])" [(set (match_dup 2) (match_dup 1))]) + +;; umax (a, add (a, b)) => [sum, ovf] = add (a, b); ovf ? a : sum +;; umin (a, add (a, b)) => [sum, ovf] = add (a, b); ovf ? sum : a + +(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")]) + +(define_int_iterator ovf_comm [1 2]) + +(define_insn_and_split "*plus_within_<code><mode>3_<ovf_comm>" + [(set (match_operand:SWI248 0 "register_operand") + (umaxmin:SWI248 + (plus:SWI248 (match_operand:SWI248 1 "nonimmediate_operand") + (match_operand:SWI248 2 "<general_operand>")) + (match_dup ovf_comm))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI248 (match_dup 1) (match_dup 2)) + (match_dup ovf_comm))) + (set (match_dup 3) + (plus:SWI248 (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SWI248 + (<ovf_add_cmp> (reg:CCC FLAGS_REG) (const_int 0)) + (match_dup 3) + (match_dup ovf_comm)))] +{ + operands[<ovf_comm>] = force_reg (<MODE>mode, operands[<ovf_comm>]); + operands[3] = gen_reg_rtx (<MODE>mode); +}) + +;; umax (a, sub (a, b)) => [diff, udf] = sub (a, b); udf ? diff : a +;; umin (a, sub (a, b)) => [diff, udf] = sub (a, b); udf ? a : diff + +(define_code_attr udf_sub_cmp [(umax "ltu") (umin "geu")]) + +(define_insn_and_split "*minus_within_<code><mode>3" + [(set (match_operand:SWI248 0 "register_operand") + (umaxmin:SWI248 + (minus:SWI248 (match_operand:SWI248 1 "nonimmediate_operand") + (match_operand:SWI248 2 "<general_operand>")) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMOVE + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel + [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 3) + (minus:SWI248 (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:SWI248 + (<udf_sub_cmp> (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3) + (match_dup 1)))] +{ + operands[1] = force_reg (<MODE>mode, operands[1]); + operands[3] = gen_reg_rtx (<MODE>mode); +}) ;; Misc patterns (?) @@ -27859,7 +27925,7 @@ { output_asm_insn ("mov{<imodesuffix>}\t{%3, %<k>1|%<k>1, %3}", operands); output_asm_insn ("mov{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}", operands); - return "movs{lq|x}\t{%2, %1|%1, %2}"; + return "movs{lq|xd}\t{%2, %1|%1, %2}"; } [(set_attr "type" "multi") (set_attr "length" "24")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5eba992..7d91585 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -643,6 +643,9 @@ (define_mode_iterator VI2_AVX512F [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI]) +(define_mode_iterator VI2_AVX10_2 + [(V32HI "TARGET_AVX10_2") (V16HI "TARGET_AVX2") V8HI]) + (define_mode_iterator VI2_AVX512VNNIBW [(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI") (V16HI "TARGET_AVX2") V8HI]) @@ -32334,8 +32337,8 @@ (define_expand "usdot_prod<sseunpackmodelower><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI2_AVX512F 1 "register_operand") - (match_operand:VI2_AVX512F 2 "register_operand") + (match_operand:VI2_AVX10_2 1 "register_operand") + (match_operand:VI2_AVX10_2 2 "register_operand") (match_operand:<sseunpackmode> 3 "register_operand")] "TARGET_AVXVNNIINT16 || TARGET_AVX10_2" { @@ -32352,8 +32355,8 @@ (define_expand "udot_prod<sseunpackmodelower><mode>" [(match_operand:<sseunpackmode> 0 "register_operand") - (match_operand:VI2_AVX512F 1 "register_operand") - (match_operand:VI2_AVX512F 2 "register_operand") + (match_operand:VI2_AVX10_2 1 "register_operand") + (match_operand:VI2_AVX10_2 2 "register_operand") (match_operand:<sseunpackmode> 3 "register_operand")] "TARGET_AVXVNNIINT16 || TARGET_AVX10_2" { diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in index 39c1545..f0c089a 100644 --- a/gcc/config/loongarch/genopts/loongarch.opt.in +++ b/gcc/config/loongarch/genopts/loongarch.opt.in @@ -205,6 +205,10 @@ mmax-inline-memcpy-size= Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. +mbreak-code= +Target Joined UInteger Var(la_break_code) Init(-1) Save +-mbreak-code=CODE Use 'break CODE' for traps supposed to be unrecoverable, or an 'amswap.w' instruction leading to INE if CODE is out of range. + Enum Name(explicit_relocs) Type(int) The code model option names for -mexplicit-relocs: diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index eed4d2b..7a91473 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -146,9 +146,6 @@ ;; Only integer modes equal or larger than a word. (define_mode_iterator ILASX_DW [V4DI V8SI]) -;; Only integer modes smaller than a word. -(define_mode_iterator ILASX_HB [V16HI V32QI]) - ;; Only used for immediate set shuffle elements instruction. (define_mode_iterator LASX_WHB_W [V8SI V16HI V32QI V8SF]) @@ -834,59 +831,6 @@ [(set_attr "type" "simd_div") (set_attr "mode" "<MODE>")]) -(define_insn "xor<mode>3" - [(set (match_operand:LASX 0 "register_operand" "=f,f,f") - (xor:LASX - (match_operand:LASX 1 "register_operand" "f,f,f") - (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] - "ISA_HAS_LASX" - "@ - xvxor.v\t%u0,%u1,%u2 - xvbitrevi.%v0\t%u0,%u1,%V2 - xvxori.b\t%u0,%u1,%B2" - [(set_attr "type" "simd_logic,simd_bit,simd_logic") - (set_attr "mode" "<MODE>")]) - -(define_insn "ior<mode>3" - [(set (match_operand:LASX 0 "register_operand" "=f,f,f") - (ior:LASX - (match_operand:LASX 1 "register_operand" "f,f,f") - (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] - "ISA_HAS_LASX" - "@ - xvor.v\t%u0,%u1,%u2 - xvbitseti.%v0\t%u0,%u1,%V2 - xvori.b\t%u0,%u1,%B2" - [(set_attr "type" "simd_logic,simd_bit,simd_logic") - (set_attr "mode" "<MODE>")]) - -(define_insn "and<mode>3" - [(set (match_operand:LASX 0 "register_operand" "=f,f,f") - (and:LASX - (match_operand:LASX 1 "register_operand" "f,f,f") - (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))] - "ISA_HAS_LASX" -{ - switch (which_alternative) - { - case 0: - return "xvand.v\t%u0,%u1,%u2"; - case 1: - { - rtx elt0 = CONST_VECTOR_ELT (operands[2], 0); - unsigned HOST_WIDE_INT val = ~UINTVAL (elt0); - operands[2] = loongarch_gen_const_int_vector (<MODE>mode, val & (-val)); - return "xvbitclri.%v0\t%u0,%u1,%V2"; - } - case 2: - return "xvandi.b\t%u0,%u1,%B2"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "simd_logic,simd_bit,simd_logic") - (set_attr "mode" "<MODE>")]) - (define_insn "one_cmpl<mode>2" [(set (match_operand:ILASX 0 "register_operand" "=f") (not:ILASX (match_operand:ILASX 1 "register_operand" "f")))] @@ -1035,16 +979,6 @@ [(set_attr "type" "simd_fmadd") (set_attr "mode" "<MODE>")]) -(define_insn "fnma<mode>4" - [(set (match_operand:FLASX 0 "register_operand" "=f") - (fma:FLASX (neg:FLASX (match_operand:FLASX 1 "register_operand" "f")) - (match_operand:FLASX 2 "register_operand" "f") - (match_operand:FLASX 3 "register_operand" "0")))] - "ISA_HAS_LASX" - "xvfnmsub.<flasxfmt>\t%u0,%u1,%u2,%u0" - [(set_attr "type" "simd_fmadd") - (set_attr "mode" "<MODE>")]) - (define_expand "sqrt<mode>2" [(set (match_operand:FLASX 0 "register_operand") (sqrt:FLASX (match_operand:FLASX 1 "register_operand")))] @@ -3633,69 +3567,38 @@ [(set_attr "type" "simd_store") (set_attr "mode" "DI")]) -(define_expand "vec_widen_<su>add_hi_<mode>" +(define_expand "vec_widen_<su><optab>_<hi_lo>_<mode>" [(match_operand:<VDMODE256> 0 "register_operand") - (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) - (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] - "ISA_HAS_LASX" -{ - loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], - <u_bool>, true, "add"); - DONE; -}) - -(define_expand "vec_widen_<su>add_lo_<mode>" - [(match_operand:<VDMODE256> 0 "register_operand") - (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) - (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] - "ISA_HAS_LASX" -{ - loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], - <u_bool>, false, "add"); - DONE; -}) - -(define_expand "vec_widen_<su>sub_hi_<mode>" - [(match_operand:<VDMODE256> 0 "register_operand") - (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) - (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] - "ISA_HAS_LASX" -{ - loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], - <u_bool>, true, "sub"); - DONE; -}) - -(define_expand "vec_widen_<su>sub_lo_<mode>" - [(match_operand:<VDMODE256> 0 "register_operand") - (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) - (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] - "ISA_HAS_LASX" -{ - loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], - <u_bool>, false, "sub"); - DONE; -}) - -(define_expand "vec_widen_<su>mult_hi_<mode>" - [(match_operand:<VDMODE256> 0 "register_operand") - (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) - (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] + (match_operand:ILASX_WHB 1 "register_operand") + (match_operand:ILASX_WHB 2 "register_operand") + (any_extend (const_int 0)) + (addsub (const_int 0) (const_int 0)) + (const_int zero_one)] "ISA_HAS_LASX" { + rtx (*fn_even) (rtx, rtx, rtx) = +gen_lasx_xv<optab>wev_<dlasxfmt>_<lasxfmt><u>; + rtx (*fn_odd) (rtx, rtx, rtx) = +gen_lasx_xv<optab>wod_<dlasxfmt>_<lasxfmt><u>; loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], - <u_bool>, true, "mult"); + <zero_one>, fn_even, fn_odd); DONE; }) -(define_expand "vec_widen_<su>mult_lo_<mode>" +(define_expand "vec_widen_<su>mult_<hi_lo>_<mode>" [(match_operand:<VDMODE256> 0 "register_operand") - (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand")) - (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))] + (match_operand:ILASX_WHB 1 "register_operand") + (match_operand:ILASX_WHB 2 "register_operand") + (any_extend (const_int 0)) + (const_int zero_one)] "ISA_HAS_LASX" { + rtx (*fn_even) (rtx, rtx, rtx) = +gen_lasx_xvmulwev_<dlasxfmt>_<lasxfmt><u>; + rtx (*fn_odd) (rtx, rtx, rtx) = +gen_lasx_xvmulwod_<dlasxfmt>_<lasxfmt><u>; loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2], - <u_bool>, false, "mult"); + <zero_one>, fn_even, fn_odd); DONE; }) diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index 6ecbe27..bec4368 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -198,7 +198,8 @@ extern void loongarch_register_frame_header_opt (void); extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *); extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, rtx *); -extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *); +extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, + rtx (*)(rtx, rtx, rtx), rtx (*)(rtx, rtx, rtx)); /* Routines implemented in loongarch-c.c. */ void loongarch_cpu_cpp_builtins (cpp_reader *); @@ -217,7 +218,8 @@ extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type); extern bool loongarch_option_valid_attribute_p (tree, tree, tree, int); -extern void loongarch_option_override_internal (struct loongarch_target *, struct gcc_options *, struct gcc_options *); +extern void loongarch_option_override_internal (struct loongarch_target *, + struct gcc_options *, struct gcc_options *); extern void loongarch_reset_previous_fndecl (void); extern void loongarch_save_restore_target_globals (tree new_tree); extern void loongarch_register_pragmas (void); diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index c782cac..f7ce3aa 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -1718,14 +1718,36 @@ loongarch_symbol_binds_local_p (const_rtx x) bool loongarch_const_vector_bitimm_set_p (rtx op, machine_mode mode) { - if (GET_CODE (op) == CONST_VECTOR && op != CONST0_RTX (mode)) + if (GET_CODE (op) == CONST_VECTOR + && (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)) { - unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0)); + unsigned HOST_WIDE_INT val; + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + rtx val_s = CONST_VECTOR_ELT (op, 0); + const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s); + if (GET_MODE (val_s) == DFmode) + { + long tmp[2]; + REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp); + val = (unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]; + } + else + { + long tmp; + REAL_VALUE_TO_TARGET_SINGLE (*x, tmp); + val = (unsigned HOST_WIDE_INT) tmp; + } + } + else + val = UINTVAL (CONST_VECTOR_ELT (op, 0)); + int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode))); if (vlog2 != -1) { - gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1); return loongarch_const_vector_same_val_p (op, mode); } @@ -1740,14 +1762,35 @@ loongarch_const_vector_bitimm_set_p (rtx op, machine_mode mode) bool loongarch_const_vector_bitimm_clr_p (rtx op, machine_mode mode) { - if (GET_CODE (op) == CONST_VECTOR && op != CONSTM1_RTX (mode)) + if (GET_CODE (op) == CONST_VECTOR + && (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)) { - unsigned HOST_WIDE_INT val = ~UINTVAL (CONST_VECTOR_ELT (op, 0)); + unsigned HOST_WIDE_INT val; + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + rtx val_s = CONST_VECTOR_ELT (op, 0); + const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s); + if (GET_MODE (val_s) == DFmode) + { + long tmp[2]; + REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp); + val = ~((unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]); + } + else + { + long tmp; + REAL_VALUE_TO_TARGET_SINGLE (*x, tmp); + val = ~((unsigned HOST_WIDE_INT) tmp); + } + } + else + val = ~UINTVAL (CONST_VECTOR_ELT (op, 0)); + int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode))); if (vlog2 != -1) { - gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1); return loongarch_const_vector_same_val_p (op, mode); } @@ -4056,6 +4099,17 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, *total = loongarch_cost->int_mult_di; else *total = loongarch_cost->int_mult_si; + + /* Check for mul_widen. */ + if ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND + && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND) + || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)) + { + *total += (set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) + + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed)); + return true; + } return false; case DIV: @@ -5479,12 +5533,32 @@ loongarch_expand_conditional_move (rtx *operands) } } + auto is_binary_op_0_keep_orig = [](enum rtx_code code) + { + switch (code) + { + case PLUS: + case MINUS: + case IOR: + case XOR: + case ROTATE: + case ROTATERT: + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + return true; + default: + return false; + } + }; + /* Check if the optimization conditions are met. */ if (value_if_true_insn && value_if_false_insn - /* Make sure that value_if_false and var are the same. */ - && BINARY_P (value_if_true_insn_src - = SET_SRC (single_set (value_if_true_insn))) + /* Make sure that the orig value OP 0 keep orig. */ + && (value_if_true_insn_src + = SET_SRC (single_set (value_if_true_insn))) + && is_binary_op_0_keep_orig ( GET_CODE (value_if_true_insn_src)) /* Make sure that both value_if_true and value_if_false has the same var. */ && rtx_equal_p (XEXP (value_if_true_insn_src, 0), @@ -6439,7 +6513,28 @@ loongarch_print_operand (FILE *file, rtx op, int letter) if (CONST_VECTOR_P (op)) { machine_mode mode = GET_MODE_INNER (GET_MODE (op)); - unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0)); + rtx val_s = CONST_VECTOR_ELT (op, 0); + unsigned HOST_WIDE_INT val; + + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s); + if (GET_MODE (val_s) == DFmode) + { + long tmp[2]; + REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp); + val = (unsigned HOST_WIDE_INT) (tmp[1] << 32 | tmp[0]); + } + else + { + long tmp; + REAL_VALUE_TO_TARGET_SINGLE (*x, tmp); + val = (unsigned HOST_WIDE_INT) tmp; + } + } + else + val = UINTVAL (val_s); + int vlog2 = exact_log2 (val & GET_MODE_MASK (mode)); if (vlog2 != -1) fprintf (file, "%d", vlog2); @@ -8808,105 +8903,22 @@ loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p) void loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2, - bool uns_p, bool high_p, const char *optab) + bool high_p, rtx (*fn_even) (rtx, rtx, rtx), + rtx (*fn_odd) (rtx, rtx, rtx)) { machine_mode wmode = GET_MODE (dest); machine_mode mode = GET_MODE (op1); - rtx t1, t2, t3; - - t1 = gen_reg_rtx (wmode); - t2 = gen_reg_rtx (wmode); - t3 = gen_reg_rtx (wmode); - switch (mode) - { - case V16HImode: - if (!strcmp (optab, "add")) - { - if (!uns_p) - { - emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2)); - emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2)); - } - else - { - emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2)); - emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2)); - } - } - else if (!strcmp (optab, "mult")) - { - if (!uns_p) - { - emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2)); - emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2)); - } - else - { - emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2)); - emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2)); - } - } - else if (!strcmp (optab, "sub")) - { - if (!uns_p) - { - emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2)); - emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2)); - } - else - { - emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2)); - emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2)); - } - } - break; - case V32QImode: - if (!strcmp (optab, "add")) - { - if (!uns_p) - { - emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2)); - emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2)); - } - else - { - emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2)); - emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2)); - } - } - else if (!strcmp (optab, "mult")) - { - if (!uns_p) - { - emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2)); - emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2)); - } - else - { - emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2)); - emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2)); - } - } - else if (!strcmp (optab, "sub")) - { - if (!uns_p) - { - emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2)); - emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2)); - } - else - { - emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2)); - emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2)); - } - } - break; + gcc_assert (ISA_HAS_LASX + && GET_MODE_SIZE (mode) == 32 + && mode != V4DImode); - default: - gcc_unreachable (); - } + rtx t1 = gen_reg_rtx (wmode); + rtx t2 = gen_reg_rtx (wmode); + rtx t3 = gen_reg_rtx (wmode); + emit_insn (fn_even (t1, op1, op2)); + emit_insn (fn_odd (t2, op1, op2)); loongarch_expand_vec_interleave (t3, t1, t2, high_p); emit_move_insn (dest, gen_lowpart (wmode, t3)); } diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index a275a2d..625f30c 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -679,14 +679,22 @@ ;; .................... ;; -(define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] +(define_insn "*trap" + [(trap_if (const_int 1) (match_operand 0 "const_int_operand"))] "" { - return "break\t0"; + return (const_uimm15_operand (operands[0], VOIDmode) + ? "break\t%0" + : "amswap.w\t$r0,$r1,$r0"); } [(set_attr "type" "trap")]) +(define_expand "trap" + [(trap_if (const_int 1) (match_dup 0))] + "" +{ + operands[0] = GEN_INT (la_break_code); +}) ;; @@ -2523,6 +2531,38 @@ [(set_attr "type" "condmove") (set_attr "mode" "<GPR:MODE>")]) +(define_insn_and_split "both_non_zero" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (ne:DI (match_operand:DI 1 "register_operand" "r") + (const_int 0)) + (ne:DI (match_operand:DI 2 "register_operand" "r") + (const_int 0))))] + "TARGET_64BIT" + "#" + "&& true" + [(set (match_dup 0) + (ne:DI (match_dup 1) (const_int 0))) + (set (match_dup 0) + (if_then_else:DI (ne:DI (match_dup 2) (const_int 0)) + (match_dup 0) + (const_int 0)))]) + +(define_insn_and_split "both_non_zero_subreg" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI (subreg:DI (ne:SI (match_operand:DI 1 "register_operand" "r") + (const_int 0)) 0) + (subreg:DI (ne:SI (match_operand:DI 2 "register_operand" "r") + (const_int 0)) 0)))] + "TARGET_64BIT" + "#" + "&& true" + [(set (match_dup 0) + (ne:DI (match_dup 1) (const_int 0))) + (set (match_dup 0) + (if_then_else:DI (ne:DI (match_dup 2) (const_int 0)) + (match_dup 0) + (const_int 0)))]) + ;; fsel copies the 3rd argument when the 1st is non-zero and the 2nd ;; argument if the 1st is zero. This means operand 2 and 3 are ;; inverted in the instruction. @@ -3041,6 +3081,16 @@ [(set_attr "type" "shift") (set_attr "mode" "SI")]) +(define_insn "sign_extend_ashift<GPR:mode><SHORT:mode>" + [(set (match_operand:GPR 0 "register_operand" "=r") + (ashift:GPR + (sign_extend:GPR (match_operand:SHORT 1 "register_operand" "r")) + (match_operand:SI 2 "const_uimm5_operand")))] + "(GET_MODE_BITSIZE (<SHORT:MODE>mode) + INTVAL (operands[2])) == 32" + "slli.w\t%0,%1,%2" + [(set_attr "type" "shift") + (set_attr "mode" "<GPR:MODE>")]) + (define_insn "*rotr<mode>3" [(set (match_operand:GPR 0 "register_operand" "=r,r") (rotatert:GPR (match_operand:GPR 1 "register_operand" "r,r") diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt index fbe61c0..628eabe 100644 --- a/gcc/config/loongarch/loongarch.opt +++ b/gcc/config/loongarch/loongarch.opt @@ -213,6 +213,10 @@ mmax-inline-memcpy-size= Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. +mbreak-code= +Target Joined UInteger Var(la_break_code) Init(-1) Save +-mbreak-code=CODE Use 'break CODE' for traps supposed to be unrecoverable, or an 'amswap.w' instruction leading to INE if CODE is out of range. + Enum Name(explicit_relocs) Type(int) The code model option names for -mexplicit-relocs: diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls index 606a211..c93f046 100644 --- a/gcc/config/loongarch/loongarch.opt.urls +++ b/gcc/config/loongarch/loongarch.opt.urls @@ -48,6 +48,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mstrict-align-1) mmax-inline-memcpy-size= UrlSuffix(gcc/LoongArch-Options.html#index-mmax-inline-memcpy-size) +mbreak-code= +UrlSuffix(gcc/LoongArch-Options.html#index-mbreak-code) + mexplicit-relocs= UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1) diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index fb0236b..cd87757 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -145,9 +145,6 @@ ;; Only integer modes equal or larger than a word. (define_mode_iterator ILSX_DW [V2DI V4SI]) -;; Only integer modes smaller than a word. -(define_mode_iterator ILSX_HB [V8HI V16QI]) - ;;;; Only integer modes for fixed-point madd_q/maddr_q. ;;(define_mode_iterator ILSX_WH [V4SI V8HI]) @@ -654,59 +651,6 @@ [(set_attr "type" "simd_div") (set_attr "mode" "<MODE>")]) -(define_insn "xor<mode>3" - [(set (match_operand:LSX 0 "register_operand" "=f,f,f") - (xor:LSX - (match_operand:LSX 1 "register_operand" "f,f,f") - (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] - "ISA_HAS_LSX" - "@ - vxor.v\t%w0,%w1,%w2 - vbitrevi.%v0\t%w0,%w1,%V2 - vxori.b\t%w0,%w1,%B2" - [(set_attr "type" "simd_logic,simd_bit,simd_logic") - (set_attr "mode" "<MODE>")]) - -(define_insn "ior<mode>3" - [(set (match_operand:LSX 0 "register_operand" "=f,f,f") - (ior:LSX - (match_operand:LSX 1 "register_operand" "f,f,f") - (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] - "ISA_HAS_LSX" - "@ - vor.v\t%w0,%w1,%w2 - vbitseti.%v0\t%w0,%w1,%V2 - vori.b\t%w0,%w1,%B2" - [(set_attr "type" "simd_logic,simd_bit,simd_logic") - (set_attr "mode" "<MODE>")]) - -(define_insn "and<mode>3" - [(set (match_operand:LSX 0 "register_operand" "=f,f,f") - (and:LSX - (match_operand:LSX 1 "register_operand" "f,f,f") - (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))] - "ISA_HAS_LSX" -{ - switch (which_alternative) - { - case 0: - return "vand.v\t%w0,%w1,%w2"; - case 1: - { - rtx elt0 = CONST_VECTOR_ELT (operands[2], 0); - unsigned HOST_WIDE_INT val = ~UINTVAL (elt0); - operands[2] = loongarch_gen_const_int_vector (<MODE>mode, val & (-val)); - return "vbitclri.%v0\t%w0,%w1,%V2"; - } - case 2: - return "vandi.b\t%w0,%w1,%B2"; - default: - gcc_unreachable (); - } -} - [(set_attr "type" "simd_logic,simd_bit,simd_logic") - (set_attr "mode" "<MODE>")]) - (define_insn "one_cmpl<mode>2" [(set (match_operand:ILSX 0 "register_operand" "=f") (not:ILSX (match_operand:ILSX 1 "register_operand" "f")))] @@ -852,16 +796,6 @@ [(set_attr "type" "simd_fmadd") (set_attr "mode" "<MODE>")]) -(define_insn "fnma<mode>4" - [(set (match_operand:FLSX 0 "register_operand" "=f") - (fma:FLSX (neg:FLSX (match_operand:FLSX 1 "register_operand" "f")) - (match_operand:FLSX 2 "register_operand" "f") - (match_operand:FLSX 3 "register_operand" "0")))] - "ISA_HAS_LSX" - "vfnmsub.<flsxfmt>\t%w0,%w1,%w2,%w0" - [(set_attr "type" "simd_fmadd") - (set_attr "mode" "<MODE>")]) - (define_expand "sqrt<mode>2" [(set (match_operand:FLSX 0 "register_operand") (sqrt:FLSX (match_operand:FLSX 1 "register_operand")))] @@ -3220,3 +3154,48 @@ [(set (match_dup 0) (vec_duplicate:V2DI (match_dup 1)))] "") + +(define_expand "vec_widen_<su><optab>_<hi_lo>_<mode>" + [(match_operand:<VDMODE> 0 "register_operand") + (match_operand:ILSX_WHB 1 "register_operand") + (match_operand:ILSX_WHB 2 "register_operand") + (any_extend (const_int 0)) + (addsub (const_int 0) (const_int 0)) + (const_int zero_one)] + "ISA_HAS_LSX" +{ + rtx t_even = gen_reg_rtx (<VDMODE>mode); + rtx t_odd = gen_reg_rtx (<VDMODE>mode); + emit_insn (gen_lsx_v<optab>wev_<dlsxfmt>_<lsxfmt><u> (t_even, operands[1], + operands[2])); + emit_insn (gen_lsx_v<optab>wod_<dlsxfmt>_<lsxfmt><u> (t_odd, operands[1], + operands[2])); + if (<zero_one>) + emit_insn (gen_lsx_vilvh_<dlsxfmt> (operands[0], t_even, t_odd)); + else + emit_insn (gen_lsx_vilvl_<dlsxfmt> (operands[0], t_even, t_odd)); + + DONE; +}) + +(define_expand "vec_widen_<su>mult_<hi_lo>_<mode>" + [(match_operand:<VDMODE> 0 "register_operand") + (match_operand:ILSX_WHB 1 "register_operand") + (match_operand:ILSX_WHB 2 "register_operand") + (any_extend (const_int 0)) + (const_int zero_one)] + "ISA_HAS_LSX" +{ + rtx t_even = gen_reg_rtx (<VDMODE>mode); + rtx t_odd = gen_reg_rtx (<VDMODE>mode); + emit_insn (gen_lsx_vmulwev_<dlsxfmt>_<lsxfmt><u> (t_even, operands[1], + operands[2])); + emit_insn (gen_lsx_vmulwod_<dlsxfmt>_<lsxfmt><u> (t_odd, operands[1], + operands[2])); + if (<zero_one>) + emit_insn (gen_lsx_vilvh_<dlsxfmt> (operands[0], t_even, t_odd)); + else + emit_insn (gen_lsx_vilvl_<dlsxfmt> (operands[0], t_even, t_odd)); + + DONE; +}) diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md index 4156b26..b73f65a 100644 --- a/gcc/config/loongarch/simd.md +++ b/gcc/config/loongarch/simd.md @@ -23,6 +23,10 @@ ;; Integer modes supported by LASX. (define_mode_iterator ILASX [V4DI V8SI V16HI V32QI]) +;; Only integer modes smaller than a word. +(define_mode_iterator ILSX_HB [V8HI V16QI]) +(define_mode_iterator ILASX_HB [V16HI V32QI]) + ;; FP modes supported by LSX (define_mode_iterator FLSX [V2DF V4SF]) @@ -38,6 +42,10 @@ ;; All integer modes available (define_mode_iterator IVEC [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")]) +;; All integer modes smaller than a word. +(define_mode_iterator IVEC_HB [(ILSX_HB "ISA_HAS_LSX") + (ILASX_HB "ISA_HAS_LASX")]) + ;; All FP modes available (define_mode_iterator FVEC [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")]) @@ -90,12 +98,18 @@ (V8HI "V4SI") (V16HI "V8SI") (V16QI "V8HI") (V32QI "V16HI")]) +(define_mode_attr WVEC_QUARTER [(V8HI "V2DI") (V16HI "V4DI") + (V16QI "V4SI") (V32QI "V8SI")]) + ;; Lower-case version. (define_mode_attr wvec_half [(V2DI "v1ti") (V4DI "v2ti") (V4SI "v2di") (V8SI "v4di") (V8HI "v4si") (V16HI "v8si") (V16QI "v8hi") (V32QI "v16hi")]) +(define_mode_attr wvec_quarter [(V8HI "v2di") (V16HI "v4di") + (V16QI "v4si") (V32QI "v8si")]) + ;; Integer vector modes with the same length and unit size as a mode. (define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI") (V8HI "V8HI") (V16QI "V16QI") @@ -124,12 +138,16 @@ (V8HI "h") (V16HI "h") (V16QI "b") (V32QI "b")]) -;; Suffix for widening LSX or LASX instructions. +;; Suffix for double widening LSX or LASX instructions. (define_mode_attr simdfmt_w [(V2DI "q") (V4DI "q") (V4SI "d") (V8SI "d") (V8HI "w") (V16HI "w") (V16QI "h") (V32QI "h")]) +;; Suffix for quadruple widening LSX or LASX instructions. +(define_mode_attr simdfmt_qw [(V8HI "d") (V16HI "d") + (V16QI "w") (V32QI "w")]) + ;; Suffix for integer mode in LSX or LASX instructions with FP input but ;; integer output. (define_mode_attr simdifmt_for_f [(V2DF "l") (V4DF "l") @@ -169,6 +187,8 @@ (V4SI "uimm5") (V8SI "uimm5") (V2DI "uimm6") (V4DI "uimm6")]) +(define_int_attr hi_lo [(0 "lo") (1 "hi")]) + ;; ======================================================================= ;; For many LASX instructions, the only difference of it from the LSX ;; counterpart is the length of vector operands. Describe these LSX/LASX @@ -431,6 +451,17 @@ [(set_attr "type" "simd_int_arith") (set_attr "mode" "<MODE>")]) +;; <x>vfnmsub.{s/d} +(define_insn "fnma<mode>4" + [(set (match_operand:FVEC 0 "register_operand" "=f") + (fma:FVEC (neg:FVEC (match_operand:FVEC 1 "register_operand" "f")) + (match_operand:FVEC 2 "register_operand" "f") + (match_operand:FVEC 3 "register_operand" "f")))] + "!HONOR_SIGNED_ZEROS (<MODE>mode)" + "<x>vfnmsub.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2,%<wu>3" + [(set_attr "type" "simd_fmadd") + (set_attr "mode" "<MODE>")]) + ;; <x>vfcmp.*.{s/d} with defined RTX code ;; There are no fcmp.{sugt/suge/cgt/cge}.{s/d} menmonics in GAS, so we have ;; to reverse the operands ourselves :(. @@ -826,6 +857,39 @@ DONE; }) +(define_expand "<su>dot_prod<wvec_quarter><mode>" + [(match_operand:<WVEC_QUARTER> 0 "register_operand" "=f,f") + (match_operand:IVEC_HB 1 "register_operand" "f,f") + (match_operand:IVEC_HB 2 "register_operand" "f,f") + (match_operand:<WVEC_QUARTER> 3 "reg_or_0_operand" "f, YG") + (any_extend (const_int 0))] + "" +{ + rtx *op = operands; + rtx res_mulev = gen_reg_rtx (<WVEC_HALF>mode); + rtx res_mulod = gen_reg_rtx (<WVEC_HALF>mode); + rtx res_addev = gen_reg_rtx (<WVEC_QUARTER>mode); + rtx res_addod = gen_reg_rtx (<WVEC_QUARTER>mode); + emit_insn (gen_<simd_isa>_<x>vmulwev_<simdfmt_w>_<simdfmt><u> + (res_mulev, op[1], op[2])); + emit_insn (gen_<simd_isa>_<x>vmulwod_<simdfmt_w>_<simdfmt><u> + (res_mulod, op[1], op[2])); + emit_insn (gen_<simd_isa>_<x>vhaddw_<simdfmt_qw><u>_<simdfmt_w><u> + (res_addev, res_mulev, res_mulev)); + emit_insn (gen_<simd_isa>_<x>vhaddw_<simdfmt_qw><u>_<simdfmt_w><u> + (res_addod, res_mulod, res_mulod)); + if (op[3] == CONST0_RTX (<WVEC_QUARTER>mode)) + emit_insn (gen_add<wvec_quarter>3 (op[0], res_addev, + res_addod)); + else + { + emit_insn (gen_add<wvec_quarter>3 (res_addev, res_addev, + res_addod)); + emit_insn (gen_add<wvec_quarter>3 (op[0], res_addev, op[3])); + } + DONE; +}) + (define_insn "simd_maddw_evod_<mode>_hetero" [(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f") (plus:<WVEC_HALF> @@ -972,6 +1036,77 @@ DONE; }) +(define_insn "xor<mode>3" + [(set (match_operand:ALLVEC 0 "register_operand" "=f,f,f") + (xor:ALLVEC + (match_operand:ALLVEC 1 "register_operand" "f,f,f") + (match_operand:ALLVEC 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] + "" + "@ + <x>vxor.v\t%<wu>0,%<wu>1,%<wu>2 + <x>vbitrevi.%v0\t%<wu>0,%<wu>1,%V2 + <x>vxori.b\t%<wu>0,%<wu>1,%B2" + [(set_attr "type" "simd_logic,simd_bit,simd_logic") + (set_attr "mode" "<MODE>")]) + +(define_insn "ior<mode>3" + [(set (match_operand:ALLVEC 0 "register_operand" "=f,f,f") + (ior:ALLVEC + (match_operand:ALLVEC 1 "register_operand" "f,f,f") + (match_operand:ALLVEC 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] + "" + "@ + <x>vor.v\t%<wu>0,%<wu>1,%<wu>2 + <x>vbitseti.%v0\t%<wu>0,%<wu>1,%V2 + <x>vori.b\t%<wu>0,%<wu>1,%B2" + [(set_attr "type" "simd_logic,simd_bit,simd_logic") + (set_attr "mode" "<MODE>")]) + +(define_insn "and<mode>3" + [(set (match_operand:ALLVEC 0 "register_operand" "=f,f,f") + (and:ALLVEC + (match_operand:ALLVEC 1 "register_operand" "f,f,f") + (match_operand:ALLVEC 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))] + "" +{ + switch (which_alternative) + { + case 0: + return "<x>vand.v\t%<wu>0,%<wu>1,%<wu>2"; + case 1: + { + rtx elt0 = CONST_VECTOR_ELT (operands[2], 0); + unsigned HOST_WIDE_INT val; + if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT) + { + const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (elt0); + if (GET_MODE (elt0) == DFmode) + { + long tmp[2]; + REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp); + val = ~((unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]); + } + else + { + long tmp; + REAL_VALUE_TO_TARGET_SINGLE (*x, tmp); + val = ~((unsigned HOST_WIDE_INT) tmp); + } + } + else + val = ~UINTVAL (elt0); + operands[2] = loongarch_gen_const_int_vector (<VIMODE>mode, val & (-val)); + return "<x>vbitclri.%v0\t%<wu>0,%<wu>1,%V2"; + } + case 2: + return "<x>vandi.b\t%<wu>0,%<wu>1,%B2"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "simd_logic,simd_bit,simd_logic") + (set_attr "mode" "<MODE>")]) + ; The LoongArch SX Instructions. (include "lsx.md") diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 59b71ed..697198f 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -1,4 +1,4 @@ -;); Machine description for RISC-V Bit Manipulation operations. +;; Machine description for RISC-V Bit Manipulation operations. ;; Copyright (C) 2021-2025 Free Software Foundation, Inc. ;; This file is part of GCC. @@ -237,19 +237,20 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "<X:MODE>")]) -(define_insn_and_split "*<optab>_not_const<mode>" - [(set (match_operand:X 0 "register_operand" "=r") - (bitmanip_bitwise:X (not:X (match_operand:X 1 "register_operand" "r")) - (match_operand:X 2 "const_arith_operand" "I"))) - (clobber (match_scratch:X 3 "=&r"))] +(define_peephole2 + [(match_scratch:X 4 "r") + (set (match_operand:X 0 "register_operand") + (not:X (match_operand:X 1 "register_operand"))) + (set (match_operand:X 2 "register_operand") + (bitmanip_bitwise:X (match_dup 0) + (match_operand 3 "const_int_operand"))) + (match_dup 4)] "(TARGET_ZBB || TARGET_ZBKB) && !TARGET_ZCB - && !optimize_function_for_size_p (cfun)" - "#" - "&& reload_completed" - [(set (match_dup 3) (match_dup 2)) - (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 3)))] - "" - [(set_attr "type" "bitmanip")]) + && !optimize_function_for_size_p (cfun) + && rtx_equal_p (operands[0], operands[2]) + && riscv_const_insns (operands[3], false) == 1" + [(set (match_dup 4) (match_dup 3)) + (set (match_dup 0) (bitmanip_bitwise:X (not:X (match_dup 1)) (match_dup 4)))]) ;; '(a >= 0) ? b : 0' is emitted branchless (from if-conversion). Without a ;; bit of extra help for combine (i.e., the below split), we end up emitting diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc index b8547a7..a42764e 100644 --- a/gcc/config/riscv/riscv-avlprop.cc +++ b/gcc/config/riscv/riscv-avlprop.cc @@ -77,6 +77,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "df.h" #include "rtl-ssa.h" +#include "rtl-iter.h" #include "cfgcleanup.h" #include "insn-attr.h" #include "tm-constrs.h" @@ -412,6 +413,46 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const && def1->insn ()->compare_with (insn) >= 0) return NULL_RTX; } + else + { + /* If the use is in a subreg e.g. in a store it is possible that + we punned the vector mode with a larger mode like + (subreg:V1SI (reg:V4QI 123)). + For an AVL of 1 that means we actually store one SImode + element and not 1 QImode elements. But the latter is what we + would propagate if we took the AVL operand literally. + Instead we scale it by the ratio of inner and outer mode + (4 in the example above). */ + int factor = 1; + if (use->includes_subregs ()) + { + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, use_insn->rtl (), NONCONST) + { + const_rtx x = *iter; + if (x + && SUBREG_P (x) + && REG_P (SUBREG_REG (x)) + && REGNO (SUBREG_REG (x)) == use->regno () + && known_eq (GET_MODE_SIZE (use->mode ()), + GET_MODE_SIZE (GET_MODE (x)))) + { + if (can_div_trunc_p (GET_MODE_NUNITS (use->mode ()), + GET_MODE_NUNITS (GET_MODE (x)), + &factor)) + { + gcc_assert (factor > 0); + break; + } + else + return NULL_RTX; + } + } + } + + if (factor > 1) + new_use_avl = GEN_INT (INTVAL (new_use_avl) * factor); + } if (!use_avl) use_avl = new_use_avl; diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 013b1dd..570acb1 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -209,6 +209,11 @@ rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt); rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt); rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt); +/* Routines implemented in riscv-vsetvl.cc. */ +extern bool has_vtype_op (rtx_insn *); +extern bool mask_agnostic_p (rtx_insn *); +extern rtx get_avl (rtx_insn *); +extern bool vsetvl_insn_p (rtx_insn *); /* Routines implemented in riscv-string.c. */ extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx); @@ -834,7 +839,8 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx); extern bool strided_load_broadcast_p (void); extern bool riscv_prefer_agnostic_p (void); extern bool riscv_use_divmod_expander (void); -void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int); +void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, + rtx, tree, int, bool); extern bool riscv_option_valid_attribute_p (tree, tree, tree, int); extern bool diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 7e4d396..22b77cc 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -1793,12 +1793,13 @@ public: The fold routines expect the replacement statement to have the same lhs as the original call, so return the copy statement rather than the field update. */ - gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple); + gassign *copy = gimple_build_assign (f.lhs, rhs_tuple); /* Get a reference to the individual vector. */ tree field = tuple_type_field (TREE_TYPE (f.lhs)); tree lhs_array - = build3 (COMPONENT_REF, TREE_TYPE (field), f.lhs, field, NULL_TREE); + = build3 (COMPONENT_REF, TREE_TYPE (field), unshare_expr (f.lhs), + field, NULL_TREE); tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), lhs_array, index, NULL_TREE, NULL_TREE); gassign *update = gimple_build_assign (lhs_vector, rhs_vector); diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 3586d0c..580ac9c 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -258,7 +258,7 @@ policy_to_str (bool agnostic_p) /* Return true if it is an RVV instruction depends on VTYPE global status register. */ -static bool +bool has_vtype_op (rtx_insn *rinsn) { return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); @@ -306,7 +306,7 @@ vector_config_insn_p (rtx_insn *rinsn) } /* Return true if it is vsetvldi or vsetvlsi. */ -static bool +bool vsetvl_insn_p (rtx_insn *rinsn) { if (!rinsn || !vector_config_insn_p (rinsn)) @@ -386,7 +386,7 @@ get_vl (rtx_insn *rinsn) } /* Helper function to get AVL operand. */ -static rtx +rtx get_avl (rtx_insn *rinsn) { if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) @@ -411,7 +411,7 @@ get_default_ma () } /* Helper function to get MA operand. */ -static bool +bool mask_agnostic_p (rtx_insn *rinsn) { /* If it doesn't have MA, we return agnostic by default. */ diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d5de76c..e978f92 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -740,6 +740,7 @@ static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *); static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *); static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int, bool *); +static tree riscv_handle_rvv_vls_cc_attribute (tree *, tree, tree, int, bool *); /* Defining target-specific uses of __attribute__. */ static const attribute_spec riscv_gnu_attributes[] = @@ -763,6 +764,8 @@ static const attribute_spec riscv_gnu_attributes[] = standard vector calling convention variant. Syntax: __attribute__((riscv_vector_cc)). */ {"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL}, + {"riscv_vls_cc", 0, 1, false, true, true, true, + riscv_handle_rvv_vls_cc_attribute, NULL}, /* This attribute is used to declare a new type, to appoint the exactly bits size of the type. For example: @@ -790,6 +793,8 @@ static const attribute_spec riscv_attributes[] = standard vector calling convention variant. Syntax: [[riscv::vector_cc]]. */ {"vector_cc", 0, 0, false, true, true, true, NULL, NULL}, + {"vls_cc", 0, 1, false, true, true, true, riscv_handle_rvv_vls_cc_attribute, + NULL}, /* This attribute is used to declare a new type, to appoint the exactly bits size of the type. For example: @@ -3723,6 +3728,12 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) riscv_vector::emit_vec_extract (result, v, gen_int_mode (index + i, Pmode)); + /* The low-part must be zero-extended when ELEN == 32 and + mode == 64. */ + if (num == 2 && i == 0) + emit_insn (gen_extend_insn (int_reg, result, mode, smode, + true)); + if (i == 1) { if (UNITS_PER_WORD < mode_size) @@ -5872,11 +5883,12 @@ typedef struct { floating-point registers. */ static int -riscv_flatten_aggregate_field (const_tree type, - riscv_aggregate_field fields[2], +riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, int n, HOST_WIDE_INT offset, - bool ignore_zero_width_bit_field_p) + bool ignore_zero_width_bit_field_p, + bool vls_p = false, unsigned abi_vlen = 0) { + int max_aggregate_field = vls_p ? 8 : 2; switch (TREE_CODE (type)) { case RECORD_TYPE: @@ -5903,9 +5915,9 @@ riscv_flatten_aggregate_field (const_tree type, else { HOST_WIDE_INT pos = offset + int_byte_position (f); - n = riscv_flatten_aggregate_field (TREE_TYPE (f), - fields, n, pos, - ignore_zero_width_bit_field_p); + n = riscv_flatten_aggregate_field ( + TREE_TYPE (f), fields, n, pos, ignore_zero_width_bit_field_p, + vls_p, abi_vlen); } if (n < 0) return -1; @@ -5915,13 +5927,14 @@ riscv_flatten_aggregate_field (const_tree type, case ARRAY_TYPE: { HOST_WIDE_INT n_elts; - riscv_aggregate_field subfields[2]; + riscv_aggregate_field subfields[8]; tree index = TYPE_DOMAIN (type); tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); - int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type), - subfields, 0, offset, - ignore_zero_width_bit_field_p); - + int n_subfields + = riscv_flatten_aggregate_field (TREE_TYPE (type), subfields, 0, + offset, + ignore_zero_width_bit_field_p, vls_p, + abi_vlen); /* Can't handle incomplete types nor sizes that are not fixed. */ if (n_subfields <= 0 || !COMPLETE_TYPE_P (type) @@ -5941,7 +5954,7 @@ riscv_flatten_aggregate_field (const_tree type, for (HOST_WIDE_INT i = 0; i < n_elts; i++) for (int j = 0; j < n_subfields; j++) { - if (n >= 2) + if (n >= max_aggregate_field) return -1; fields[n] = subfields[j]; @@ -5973,18 +5986,36 @@ riscv_flatten_aggregate_field (const_tree type, } default: - if (n < 2 - && ((SCALAR_FLOAT_TYPE_P (type) - && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG) - || (INTEGRAL_TYPE_P (type) - && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD))) + poly_uint64 mode_size = GET_MODE_SIZE (TYPE_MODE (type)); + if (vls_p) { - fields[n].type = type; - fields[n].offset = offset; - return n + 1; + gcc_assert (abi_vlen != 0); + if (n < max_aggregate_field + && (VECTOR_TYPE_P (type) && mode_size.is_constant () + && (mode_size.to_constant () <= abi_vlen * 8))) + { + fields[n].type = type; + fields[n].offset = offset; + return n + 1; + } + else + return -1; } else - return -1; + { + if (n < max_aggregate_field + && ((SCALAR_FLOAT_TYPE_P (type) + && mode_size.to_constant () <= UNITS_PER_FP_ARG) + || (INTEGRAL_TYPE_P (type) + && mode_size.to_constant () <= UNITS_PER_WORD))) + { + fields[n].type = type; + fields[n].offset = offset; + return n + 1; + } + else + return -1; + } } } @@ -5993,14 +6024,16 @@ riscv_flatten_aggregate_field (const_tree type, static int riscv_flatten_aggregate_argument (const_tree type, - riscv_aggregate_field fields[2], - bool ignore_zero_width_bit_field_p) + riscv_aggregate_field *fields, + bool ignore_zero_width_bit_field_p, + bool vls_p = false, unsigned abi_vlen = 0) { if (!type || TREE_CODE (type) != RECORD_TYPE) return -1; return riscv_flatten_aggregate_field (type, fields, 0, 0, - ignore_zero_width_bit_field_p); + ignore_zero_width_bit_field_p, vls_p, + abi_vlen); } /* See whether TYPE is a record whose fields should be returned in one or @@ -6163,18 +6196,22 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode, return gen_rtx_PARALLEL (mode, gen_rtvec (1, x)); } +static const predefined_function_abi & +riscv_fntype_abi_1 (const_tree fntype, bool check_only); + /* Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function whose data type is FNTYPE. For a library call, FNTYPE is 0. */ void riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, - rtx, tree, int) + rtx, tree, int, bool check_only) { memset (cum, 0, sizeof (*cum)); if (fntype) - cum->variant_cc = (riscv_cc) fntype_abi (fntype).id (); + cum->variant_cc = + (riscv_cc) riscv_fntype_abi_1 (fntype, check_only).id (); else cum->variant_cc = RISCV_CC_BASE; } @@ -6197,7 +6234,7 @@ riscv_hard_regno_nregs (unsigned int regno, machine_mode mode); static rtx riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, - machine_mode mode, bool return_p) + machine_mode mode, bool return_p, bool vls_p = false) { gcc_assert (riscv_v_ext_mode_p (mode)); @@ -6233,8 +6270,9 @@ riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, int arg_reg_end = V_ARG_LAST - V_REG_FIRST; int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL); - /* For scalable data and scalable tuple return value. */ - if (return_p) + /* For scalable data and scalable tuple return value. + For VLS CC, we may pass struct like tuple, so need defer the handling. */ + if (return_p && !vls_p) return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST); /* Iterate through the USED_VRS array to find vector register groups that have @@ -6271,6 +6309,224 @@ riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, return NULL_RTX; } + +#define RISCV_ALL_VALID_ABI_VLEN(F) \ + F (32) \ + F (64) \ + F (128) \ + F (256) \ + F (512) \ + F (1024) \ + F (2048) \ + F (4096) \ + F (8192) \ + F (16384) + +/* Return true if CC is a variant of VLS CC. */ + +static bool +riscv_vls_cc_p (riscv_cc cc) +{ + switch (cc) + { +#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \ + case RISCV_CC_VLS_V_##ABI_VLEN: + RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE) + +#undef VLS_CC_ABI_VLEN_CASE + return true; + default: + return false; + } +} + +/* Get ABI_VLEN from cc. */ + +static unsigned int +riscv_get_cc_abi_vlen (riscv_cc cc) +{ + switch (cc) + { +#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \ + case RISCV_CC_VLS_V_##ABI_VLEN: \ + return ABI_VLEN; + RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE) + +#undef VLS_CC_ABI_VLEN_CASE + default: + gcc_unreachable (); + } +} + +/* Return true if ABI_VLEN is a valid for VLS_CC. */ + +static bool +riscv_valid_abi_vlen_vls_cc_p (unsigned abi_vlen) +{ + switch (abi_vlen) + { +#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \ + case ABI_VLEN: + RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE) + +#undef VLS_CC_ABI_VLEN_CASE + return true; + default: + return false; + } +} + +static riscv_cc +riscv_get_riscv_cc_by_abi_vlen (unsigned abi_vlen) +{ + switch (abi_vlen) + { +#define VLS_CC_ABI_VLEN_CASE(ABI_VLEN) \ + case ABI_VLEN: \ + return RISCV_CC_VLS_V_##ABI_VLEN; + RISCV_ALL_VALID_ABI_VLEN (VLS_CC_ABI_VLEN_CASE) + +#undef VLS_CC_ABI_VLEN_CASE + default: + gcc_unreachable (); + } +} + +/* Get a VLS type has same size as MODE in ABI_VLEN, but element is always + in integer mode. */ + +static machine_mode +riscv_get_vls_container_type (machine_mode mode, unsigned abi_vlen) +{ + machine_mode element_mode = GET_MODE_INNER (mode); + unsigned int mode_size = GET_MODE_SIZE (mode).to_constant (); + unsigned int lmul = ROUND_UP (mode_size * 8, abi_vlen) / abi_vlen; + + /* Always use integer mode to pass to simplify the logic - we allow pass + unsupported vector type in vector register, e.g. float16x4_t even no vector + fp16 support. */ + switch (GET_MODE_SIZE (element_mode).to_constant ()) + { + case 1: + element_mode = QImode; + break; + case 2: + element_mode = HImode; + break; + case 4: + element_mode = SImode; + break; + case 8: + element_mode = DImode; + break; + default: + gcc_unreachable (); + } + + scalar_mode smode = as_a<scalar_mode> (element_mode); + return get_lmul_mode (smode, lmul).require (); +} + +/* Pass VLS type argument in vector argument register. */ + +static rtx +riscv_pass_vls_in_vr (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, + machine_mode mode, bool return_p) +{ + gcc_assert (riscv_v_ext_vls_mode_p (mode)); + + unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc); + unsigned int mode_size = GET_MODE_SIZE (mode).to_constant (); + unsigned int lmul = ROUND_UP (mode_size * 8, abi_vlen) / abi_vlen; + + /* Put into memory if it need more than 8 registers (> LMUL 8). */ + if (lmul > 8) + return NULL_RTX; + + machine_mode vla_mode = riscv_get_vls_container_type (mode, abi_vlen); + rtx reg = riscv_get_vector_arg (info, cum, vla_mode, + return_p, /* vls_p */ true); + + /* Can't get vector register to pass, pass by memory. */ + if (!reg) + return NULL_RTX; + + PUT_MODE (reg, mode); + + return reg; +} + +/* Pass aggregate with VLS type argument in vector argument registers. */ + +static rtx +riscv_pass_aggregate_in_vr (struct riscv_arg_info *info, + const CUMULATIVE_ARGS *cum, const_tree type, + bool return_p) +{ + riscv_aggregate_field fields[8]; + unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc); + int i; + int n = riscv_flatten_aggregate_argument (type, fields, true, + /* vls_p */ true, abi_vlen); + + if (n == -1) + return NULL_RTX; + + /* Check all field has same size. */ + unsigned int mode_size + = GET_MODE_SIZE (TYPE_MODE (fields[0].type)).to_constant (); + for (int i = 1; i < n; i++) + if (GET_MODE_SIZE (TYPE_MODE (fields[i].type)).to_constant () != mode_size) + return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */ + + /* Check total size is <= abi_vlen * 8, we use up to 8 vector register to + pass argument. */ + if (mode_size * 8 > abi_vlen) + return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */ + + /* Backup cum->used_vrs since we will defer the update until + riscv_function_arg_advance. */ + CUMULATIVE_ARGS local_cum; + memcpy (&local_cum, cum, sizeof (local_cum)); + + unsigned num_vrs = 0; + + /* Allocate vector registers for the arguments. */ + rtx expr_list[8]; + for (i = 0; i < n; i++) + { + machine_mode mode = TYPE_MODE (fields[i].type); + machine_mode vla_mode = riscv_get_vls_container_type (mode, abi_vlen); + /* Use riscv_get_vector_arg with VLA type to simplify the calling + convention implementation. */ + rtx reg + = riscv_get_vector_arg (info, &local_cum, vla_mode, + return_p, /* vls_p */true); + + /* Can't get vector register to pass, pass by memory. */ + if (!reg) + return NULL_RTX; + + PUT_MODE (reg, mode); + + expr_list[i] + = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (fields[i].offset)); + + num_vrs += info->num_vrs; + + /* Set the corresponding register in USED_VRS to used status. */ + for (unsigned int i = 0; i < info->num_vrs; i++) + { + gcc_assert (!local_cum.used_vrs[info->vr_offset + i]); + local_cum.used_vrs[info->vr_offset + i] = true; + } + } + + info->num_vrs = num_vrs; + + return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (n, expr_list)); +} + /* Fill INFO with information about a single argument, and return an RTL pattern to pass or return the argument. Return NULL_RTX if argument cannot pass or return in registers, then the argument may be passed by reference or @@ -6363,7 +6619,17 @@ riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum, if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode)) return riscv_get_vector_arg (info, cum, mode, return_p); - /* For vls mode aggregated in gpr. */ + if (riscv_vls_cc_p (cum->variant_cc)) + { + if (riscv_v_ext_vls_mode_p (mode)) + return riscv_pass_vls_in_vr (info, cum, mode, return_p); + + rtx ret = riscv_pass_aggregate_in_vr (info, cum, type, return_p); + if (ret) + return ret; + } + + /* For vls mode aggregated in gpr (for non-VLS-CC). */ if (riscv_v_ext_vls_mode_p (mode)) return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base); } @@ -6420,7 +6686,8 @@ riscv_function_arg_advance (cumulative_args_t cum_v, cum->used_vrs[info.vr_offset + i] = true; } - if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V) + if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V + && !riscv_vls_cc_p (cum->variant_cc)) { error ("RVV type %qT cannot be passed to an unprototyped function", arg.type); @@ -6463,7 +6730,8 @@ riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type, { const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ? TREE_TYPE (fn_decl_or_type) : fn_decl_or_type; - riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0); + riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0, + /* check_only */true); } else memset (&args, 0, sizeof args); @@ -6532,14 +6800,20 @@ riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) /* Implement TARGET_RETURN_IN_MEMORY. */ static bool -riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) +riscv_return_in_memory (const_tree type, const_tree fntype) { CUMULATIVE_ARGS args; + + if (fntype) + riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0, + /* check_only */true); + else + /* The rules for returning in memory are the same as for passing the + first named argument by reference. */ + memset (&args, 0, sizeof args); + cumulative_args_t cum = pack_cumulative_args (&args); - /* The rules for returning in memory are the same as for passing the - first named argument by reference. */ - memset (&args, 0, sizeof args); function_arg_info arg (const_cast<tree> (type), /*named=*/true); return riscv_pass_by_reference (cum, arg); } @@ -6583,9 +6857,9 @@ riscv_setup_incoming_varargs (cumulative_args_t cum, /* Return the descriptor of the Standard Vector Calling Convention Variant. */ static const predefined_function_abi & -riscv_v_abi () +riscv_v_abi (riscv_cc abi) { - predefined_function_abi &v_abi = function_abis[RISCV_CC_V]; + predefined_function_abi &v_abi = function_abis[abi]; if (!v_abi.initialized_p ()) { HARD_REG_SET full_reg_clobbers @@ -6595,7 +6869,7 @@ riscv_v_abi () CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1) CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); - v_abi.initialize (RISCV_CC_V, full_reg_clobbers); + v_abi.initialize (abi, full_reg_clobbers); } return v_abi; } @@ -6756,13 +7030,14 @@ riscv_validate_vector_type (const_tree type, const char *hint) RISC-V V registers. */ static bool -riscv_return_value_is_vector_type_p (const_tree fntype) +riscv_return_value_is_vector_type_p (const_tree fntype, bool check_only) { tree return_type = TREE_TYPE (fntype); if (riscv_vector_type_p (return_type)) { - riscv_validate_vector_type (return_type, "return type"); + if (!check_only) + riscv_validate_vector_type (return_type, "return type"); return true; } else @@ -6773,7 +7048,7 @@ riscv_return_value_is_vector_type_p (const_tree fntype) RISC-V V registers. */ static bool -riscv_arguments_is_vector_type_p (const_tree fntype) +riscv_arguments_is_vector_type_p (const_tree fntype, bool check_only) { for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node; chain = TREE_CHAIN (chain)) @@ -6781,7 +7056,8 @@ riscv_arguments_is_vector_type_p (const_tree fntype) tree arg_type = TREE_VALUE (chain); if (riscv_vector_type_p (arg_type)) { - riscv_validate_vector_type (arg_type, "argument type"); + if (!check_only) + riscv_validate_vector_type (arg_type, "argument type"); return true; } } @@ -6792,14 +7068,15 @@ riscv_arguments_is_vector_type_p (const_tree fntype) /* Return true if FUNC is a riscv_vector_cc function. For more details please reference the below link. https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */ + static bool -riscv_vector_cc_function_p (const_tree fntype) +riscv_vector_cc_function_p (const_tree fntype, bool check_only) { tree attr = TYPE_ATTRIBUTES (fntype); bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE || lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE; - if (vector_cc_p && !TARGET_VECTOR) + if (vector_cc_p && !TARGET_VECTOR && !check_only) error_at (input_location, "function attribute %qs requires the V ISA extension", "riscv_vector_cc"); @@ -6807,26 +7084,91 @@ riscv_vector_cc_function_p (const_tree fntype) return vector_cc_p; } -/* Implement TARGET_FNTYPE_ABI. */ +/* Return the riscv_cc value according to the attribute arguments. + If the attribute arguments are invalid, return RISCV_CC_UNKNOWN + and emit an error message. */ + +static riscv_cc +riscv_get_vls_cc_attr (const_tree args, bool check_only = false) +{ + /* Default ABI_VLEN is 128. */ + int abi_vlen = 128; + + if (args && TREE_CODE (args) == TREE_LIST) + { + tree vlen_arg = TREE_VALUE (args); + if (vlen_arg && TREE_CODE (vlen_arg) == INTEGER_CST) + abi_vlen = TREE_INT_CST_LOW (vlen_arg); + } + + if (!riscv_valid_abi_vlen_vls_cc_p (abi_vlen) && !check_only) + { + error_at (input_location, + "unsupported %<ABI_VLEN%> value %d for %qs attribute;" + "%<ABI_VLEN must%> be in the range [32, 16384] and must be " + "a power of 2", + abi_vlen, "riscv_vls_cc"); + return RISCV_CC_UNKNOWN; + } + + return riscv_get_riscv_cc_by_abi_vlen (abi_vlen); +} + +/* Return true if FUNC is a riscv_vector_cc function. + For more details please reference the below link. + https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */ +static riscv_cc +riscv_vls_cc_function_abi (const_tree fntype, bool check_only) +{ + tree attr = TYPE_ATTRIBUTES (fntype); + bool vls_cc_p = lookup_attribute ("vls_cc", attr) != NULL_TREE + || lookup_attribute ("riscv_vls_cc", attr) != NULL_TREE; + + if (!vls_cc_p) + return RISCV_CC_UNKNOWN; + + if (!TARGET_VECTOR && !check_only) + error_at (input_location, + "function attribute %qs requires the vector ISA extension", + "riscv_vls_cc"); + + tree args = TREE_VALUE (attr); + return riscv_get_vls_cc_attr (args); +} + +/* Implemention of TARGET_FNTYPE_ABI, but one extra parameter `check_only` + to suppress warning message. */ static const predefined_function_abi & -riscv_fntype_abi (const_tree fntype) +riscv_fntype_abi_1 (const_tree fntype, bool check_only) { /* Implement the vector calling convention. For more details please reference the below link. https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */ bool validate_v_abi_p = false; - validate_v_abi_p |= riscv_return_value_is_vector_type_p (fntype); - validate_v_abi_p |= riscv_arguments_is_vector_type_p (fntype); - validate_v_abi_p |= riscv_vector_cc_function_p (fntype); + validate_v_abi_p |= riscv_return_value_is_vector_type_p (fntype, check_only); + validate_v_abi_p |= riscv_arguments_is_vector_type_p (fntype, check_only); + validate_v_abi_p |= riscv_vector_cc_function_p (fntype, check_only); if (validate_v_abi_p) - return riscv_v_abi (); + return riscv_v_abi (RISCV_CC_V); + + riscv_cc abi = riscv_vls_cc_function_abi (fntype, check_only); + if (abi != RISCV_CC_UNKNOWN) + return riscv_v_abi (abi); return default_function_abi; } +/* Implement TARGET_FNTYPE_ABI. */ + +static const predefined_function_abi & +riscv_fntype_abi (const_tree fntype) +{ + return riscv_fntype_abi_1 (fntype, /* check_only */true); +} + /* Return riscv calling convention of call_insn. */ riscv_cc get_riscv_cc (const rtx use) @@ -6916,6 +7258,25 @@ riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, } static tree +riscv_handle_rvv_vls_cc_attribute (tree *, tree name, tree args, + ATTRIBUTE_UNUSED int flags, + bool *no_add_attrs) +{ + bool vls_cc_p = is_attribute_p ("vls_cc", name) + || is_attribute_p ("riscv_vls_cc", name); + + if (!vls_cc_p) + return NULL_TREE; + + riscv_cc cc = riscv_get_vls_cc_attr (args); + + if (cc == RISCV_CC_UNKNOWN) + *no_add_attrs = true; + + return NULL_TREE; +} + +static tree riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args, ATTRIBUTE_UNUSED int flags, bool *no_add_attrs) @@ -10215,6 +10576,71 @@ riscv_issue_rate (void) return tune_param->issue_rate; } +/* Structure for very basic vector configuration tracking in the scheduler. */ +struct last_vconfig +{ + bool valid; + bool ta; + bool ma; + uint8_t sew; + uint8_t vlmul; + rtx avl; +} last_vconfig; + +/* Clear LAST_VCONFIG so we have no known state. */ +static void +clear_vconfig (void) +{ + memset (&last_vconfig, 0, sizeof (last_vconfig)); +} + +/* Return TRUE if INSN is a vector insn needing a particular + vector configuration that is trivially equal to the last + vector insn issued. Return FALSE otherwise. */ +static bool +compatible_with_last_vconfig (rtx_insn *insn) +{ + /* We might be able to extract the data from a preexisting vsetvl. */ + if (vsetvl_insn_p (insn)) + return false; + + /* Nothing to do for these cases. */ + if (!NONDEBUG_INSN_P (insn) || !has_vtype_op (insn)) + return false; + + extract_insn_cached (insn); + + rtx avl = get_avl (insn); + if (avl != last_vconfig.avl) + return false; + + if (get_sew (insn) != last_vconfig.sew) + return false; + + if (get_vlmul (insn) != last_vconfig.vlmul) + return false; + + if (tail_agnostic_p (insn) != last_vconfig.ta) + return false; + + if (mask_agnostic_p (insn) != last_vconfig.ma) + return false; + + /* No differences found, they're trivially compatible. */ + return true; +} + +/* Implement TARGET_SCHED_INIT, we use this to track the vector configuration + of the last issued vector instruction. We can then use that information + to potentially adjust the ready queue to issue instructions of a compatible + vector configuration instead of a conflicting configuration. That will + reduce the number of vsetvl instructions we ultimately emit. */ +static void +riscv_sched_init (FILE *, int, int) +{ + clear_vconfig (); +} + /* Implement TARGET_SCHED_VARIABLE_ISSUE. */ static int riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) @@ -10239,9 +10665,88 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) an assert so we can find and fix this problem. */ gcc_assert (insn_has_dfa_reservation_p (insn)); + /* If this is a vector insn with vl/vtype info, then record the last + vector configuration. */ + if (vsetvl_insn_p (insn)) + clear_vconfig (); + else if (NONDEBUG_INSN_P (insn) && has_vtype_op (insn)) + { + extract_insn_cached (insn); + + rtx avl = get_avl (insn); + if (avl == RVV_VLMAX) + avl = const0_rtx; + + if (!avl || !CONST_INT_P (avl)) + clear_vconfig (); + else + { + last_vconfig.valid = true; + last_vconfig.avl = avl; + last_vconfig.sew = get_sew (insn); + last_vconfig.vlmul = get_vlmul (insn); + last_vconfig.ta = tail_agnostic_p (insn); + last_vconfig.ma = mask_agnostic_p (insn); + } + } + return more - 1; } +/* Implement TARGET_SCHED_REORDER. The goal here is to look at the ready + queue and reorder it ever so slightly to encourage issing an insn with + the same vector configuration as the most recently issued vector + instruction. That will reduce vsetvl instructions. */ +static int +riscv_sched_reorder (FILE *, int, rtx_insn **ready, int *nreadyp, int) +{ + /* If we don't have a valid prior vector configuration, then there is + no point in reordering the ready queue, similarly if there is + just one entry in the queue. */ + if (!last_vconfig.valid || *nreadyp == 1) + return riscv_issue_rate (); + + return riscv_issue_rate (); + int nready = *nreadyp; + int priority = INSN_PRIORITY (ready[nready - 1]); + for (int i = nready - 1; i >= 0; i--) + { + rtx_insn *insn = ready[i]; + + /* On a high performance core, vsetvl instructions should be + inexpensive. Removing them is very much a secondary concern, so + be extremely conservative with reordering, essentially only + allowing reordering within the highest priority value. + + Lower end cores may benefit from more flexibility here. That + tuning is left to those who understand their core's behavior + and can thoroughly benchmark the result. Assuming such + designs appear, we can probably put an entry in the tuning + structure to indicate how much difference in priority to allow. */ + if (INSN_PRIORITY (insn) < priority) + break; + + if (compatible_with_last_vconfig (insn)) + { + /* This entry is compatible with the last vconfig and has + the same priority as the most important insn. So swap + it so that we keep the vector configuration as-is and + ultimately eliminate a vsetvl. + + Note no need to swap if this is the first entry in the + queue. */ + if (i == nready - 1) + break; + + std::swap (ready[i], ready[nready - 1]); + break; + } + } + + return riscv_issue_rate (); +} + + /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports instruction fusion of some sort. */ @@ -11082,7 +11587,7 @@ riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name) if (TREE_CODE (decl) == FUNCTION_DECL) { riscv_cc cc = (riscv_cc) fndecl_abi (decl).id (); - if (cc == RISCV_CC_V) + if (cc == RISCV_CC_V || riscv_vls_cc_p (cc)) { fprintf (stream, "\t.variant_cc\t"); assemble_name (stream, name); @@ -15650,9 +16155,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_SCHED_MACRO_FUSION_PAIR_P #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT riscv_sched_init + #undef TARGET_SCHED_VARIABLE_ISSUE #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER riscv_sched_reorder + #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 9146571..a0ad75c 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -779,6 +779,17 @@ enum riscv_cc { RISCV_CC_BASE = 0, /* Base standard RISC-V ABI. */ RISCV_CC_V, /* For functions that pass or return values in V registers. */ + /* For functions that pass or return values in V registers. */ + RISCV_CC_VLS_V_32, + RISCV_CC_VLS_V_64, + RISCV_CC_VLS_V_128, + RISCV_CC_VLS_V_256, + RISCV_CC_VLS_V_512, + RISCV_CC_VLS_V_1024, + RISCV_CC_VLS_V_2048, + RISCV_CC_VLS_V_4096, + RISCV_CC_VLS_V_8192, + RISCV_CC_VLS_V_16384, RISCV_CC_UNKNOWN }; @@ -786,6 +797,8 @@ typedef struct { /* The calling convention that current function used. */ enum riscv_cc variant_cc; + unsigned int abi_vlen; + /* Number of integer registers used so far, up to MAX_ARGS_IN_REGISTERS. */ unsigned int num_gprs; @@ -809,7 +822,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use); #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \ riscv_init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (INDIRECT), \ - (N_NAMED_ARGS) != -1) + (N_NAMED_ARGS) != -1, /* check_only */false) #define EPILOGUE_USES(REGNO) riscv_epilogue_uses (REGNO) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 3cb87bf..9d34725 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1437,6 +1437,8 @@ [(set_attr "type" "vlde,vste,vmov") (set_attr "mode" "<MODE>") (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE)) + (set (attr "has_vl_op") (const_string "false")) + (set (attr "has_vtype_op") (const_string "false")) (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE)) (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))] ) diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 374288d..c713451 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -43,6 +43,7 @@ UNSPEC_FRAME_BLOCKAGE UNSPEC_CEIL UNSPEC_FLOOR + UNSPEC_ROUND ]) (define_c_enum "unspecv" [ @@ -104,8 +105,11 @@ ;; This iterator and attribute allow FP-to-integer rounding of two types ;; to be generated from one template. -(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR]) -(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor")]) +(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR UNSPEC_ROUND]) +(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor") + (UNSPEC_ROUND "round")]) +(define_int_attr c_round [(UNSPEC_CEIL "1") (UNSPEC_FLOOR "1") + (UNSPEC_ROUND "flag_unsafe_math_optimizations")]) ;; Attributes. @@ -680,35 +684,26 @@ (set_attr "mode" "SI") (set_attr "length" "3")]) -(define_insn_and_split "one_cmplsi2" - [(set (match_operand:SI 0 "register_operand" "=a") - (not:SI (match_operand:SI 1 "register_operand" "r")))] +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "register_operand") + (not:SI (match_operand:SI 1 "register_operand")))] "" - "#" - "&& can_create_pseudo_p ()" - [(set (match_dup 2) - (const_int -1)) - (set (match_dup 0) - (xor:SI (match_dup 1) - (match_dup 2)))] { - operands[2] = gen_reg_rtx (SImode); -} - [(set_attr "type" "arith") - (set_attr "mode" "SI") - (set (attr "length") - (if_then_else (match_test "TARGET_DENSITY") - (const_int 5) - (const_int 6)))]) + emit_insn (gen_xorsi3 (operands[0], operands[1], + force_reg (SImode, constm1_rtx))); + DONE; +}) (define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") - (neg:SF (match_operand:SF 1 "register_operand" "f")))] + [(set (match_operand:SF 0 "register_operand") + (neg:SF (match_operand:SF 1 "register_operand"))) + (clobber (match_scratch:SI 2))] "TARGET_HARD_FLOAT" - "neg.s\t%0, %1" - [(set_attr "type" "farith") - (set_attr "mode" "SF") - (set_attr "length" "3")]) + {@ [cons: =0, 1, =2; attrs: type, length] + [D, D, &a; arith , 7] movi.n\t%2, 1\;slli\t%2, %2, 31\;add.n\t%0, %1, %2 + [f, f, X; farith, 3] neg.s\t%0, %1 + } + [(set_attr "mode" "SF")]) ;; Logical instructions. @@ -1150,7 +1145,7 @@ (define_insn "*fix<s_fix>_truncsfsi2_scaled" [(set (match_operand:SI 0 "register_operand" "=a") (any_fix:SI (mult:SF (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "fix_scaling_operand" "F"))))] + (match_operand:SF 2 "fix_scaling_operand" ""))))] "TARGET_HARD_FLOAT" "<m_fix>.s\t%0, %1, %U2" [(set_attr "type" "fconv") @@ -1169,7 +1164,7 @@ (define_insn "*float<s_float>sisf2_scaled" [(set (match_operand:SF 0 "register_operand" "=f") (mult:SF (any_float:SF (match_operand:SI 1 "register_operand" "a")) - (match_operand:SF 2 "float_scaling_operand" "F")))] + (match_operand:SF 2 "float_scaling_operand" "")))] "TARGET_HARD_FLOAT" "<m_float>.s\t%0, %1, %V2" [(set_attr "type" "fconv") @@ -1179,7 +1174,7 @@ (define_insn "l<m_round>sfsi2" [(set (match_operand:SI 0 "register_operand" "=a") (unspec:SI [(match_operand:SF 1 "register_operand" "f")] ANY_ROUND))] - "TARGET_HARD_FLOAT" + "TARGET_HARD_FLOAT && <c_round>" "<m_round>.s\t%0, %1, 0" [(set_attr "type" "fconv") (set_attr "mode" "SF") @@ -1189,7 +1184,7 @@ [(set (match_operand:SI 0 "register_operand" "=a") (unspec:SI [(plus:SF (match_operand:SF 1 "register_operand" "f") (match_dup 1))] ANY_ROUND))] - "TARGET_HARD_FLOAT" + "TARGET_HARD_FLOAT && <c_round>" "<m_round>.s\t%0, %1, 1" [(set_attr "type" "fconv") (set_attr "mode" "SF") @@ -1198,8 +1193,8 @@ (define_insn "*l<m_round>sfsi2_scaled" [(set (match_operand:SI 0 "register_operand" "=a") (unspec:SI [(mult:SF (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "fix_scaling_operand" "F"))] ANY_ROUND))] - "TARGET_HARD_FLOAT" + (match_operand:SF 2 "fix_scaling_operand" ""))] ANY_ROUND))] + "TARGET_HARD_FLOAT && <c_round>" "<m_round>.s\t%0, %1, %U2" [(set_attr "type" "fconv") (set_attr "mode" "SF") |
