diff options
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-cost-tables.h | 54 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 5 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 34 | ||||
-rw-r--r-- | gcc/config/arm/aarch-cost-tables.h | 36 | ||||
-rw-r--r-- | gcc/config/avr/avr-passes.cc | 6 | ||||
-rw-r--r-- | gcc/config/cris/cris.cc | 6 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-valu.md | 262 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.cc | 106 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 6 | ||||
-rw-r--r-- | gcc/config/pru/pru-pragma.cc | 13 | ||||
-rw-r--r-- | gcc/config/pru/pru-protos.h | 8 | ||||
-rw-r--r-- | gcc/config/pru/pru.cc | 8 | ||||
-rw-r--r-- | gcc/config/riscv/autovec-opt.md | 113 | ||||
-rw-r--r-- | gcc/config/riscv/autovec.md | 6 | ||||
-rw-r--r-- | gcc/config/riscv/generic-vector-ooo.md | 85 | ||||
-rw-r--r-- | gcc/config/riscv/mips-p8700.md | 2 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.cc | 40 | ||||
-rw-r--r-- | gcc/config/riscv/vector.md | 30 | ||||
-rw-r--r-- | gcc/config/riscv/xiangshan.md | 3 | ||||
-rw-r--r-- | gcc/config/s390/s390.cc | 11 | ||||
-rw-r--r-- | gcc/config/xtensa/xtensa.cc | 44 | ||||
-rw-r--r-- | gcc/config/xtensa/xtensa.md | 16 |
22 files changed, 687 insertions, 207 deletions
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h index c49ff7f..e7926eb 100644 --- a/gcc/config/aarch64/aarch64-cost-tables.h +++ b/gcc/config/aarch64/aarch64-cost-tables.h @@ -125,9 +125,9 @@ const struct cpu_cost_table qdf24xx_extra_costs = { COSTS_N_INSNS (1), /* alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -233,9 +233,9 @@ const struct cpu_cost_table thunderx_extra_costs = { COSTS_N_INSNS (1), /* Alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -340,9 +340,9 @@ const struct cpu_cost_table thunderx2t99_extra_costs = { COSTS_N_INSNS (1), /* Alu. */ COSTS_N_INSNS (4), /* Mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -447,9 +447,9 @@ const struct cpu_cost_table thunderx3t110_extra_costs = { COSTS_N_INSNS (1), /* Alu. */ COSTS_N_INSNS (4), /* Mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -555,9 +555,9 @@ const struct cpu_cost_table tsv110_extra_costs = { COSTS_N_INSNS (1), /* alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -662,9 +662,9 @@ const struct cpu_cost_table a64fx_extra_costs = { COSTS_N_INSNS (1), /* alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -769,9 +769,9 @@ const struct cpu_cost_table ampere1_extra_costs = { COSTS_N_INSNS (3), /* alu. */ COSTS_N_INSNS (3), /* mult. */ - COSTS_N_INSNS (2), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (1), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -876,9 +876,9 @@ const struct cpu_cost_table ampere1a_extra_costs = { COSTS_N_INSNS (3), /* alu. */ COSTS_N_INSNS (3), /* mult. */ - COSTS_N_INSNS (2), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (1), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -983,9 +983,9 @@ const struct cpu_cost_table ampere1b_extra_costs = { COSTS_N_INSNS (1), /* alu. */ COSTS_N_INSNS (2), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (1), /* dup. */ - COSTS_N_INSNS (1) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (0), /* dup. */ + COSTS_N_INSNS (0) /* extract. */ } }; diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 270cb2f..8b75c3d 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1190,13 +1190,16 @@ [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")] ) +;; Inserting from the zero register into a vector lane is treated as an +;; expensive GP->FP move on all CPUs. Avoid it when optimizing for speed. (define_insn "aarch64_simd_vec_set_zero<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (vec_merge:VALL_F16 (match_operand:VALL_F16 1 "register_operand" "0") (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "") (match_operand:SI 2 "immediate_operand" "i")))] - "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0" + "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0 + && optimize_function_for_size_p (cfun)" { int elt = ENDIAN_LANE_N (<nunits>, aarch64_exact_log2_inverse (<nunits>, diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 0485f69..9e4a37b 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -15854,11 +15854,14 @@ cost_plus: break; case CONST_VECTOR: { - /* Load using MOVI/MVNI. */ - if (aarch64_simd_valid_mov_imm (x)) - *cost = extra_cost->vect.movi; - else /* Load using constant pool. */ - *cost = extra_cost->ldst.load; + if (speed) + { + /* Load using MOVI/MVNI. */ + if (aarch64_simd_valid_mov_imm (x)) + *cost += extra_cost->vect.movi; + else /* Load using constant pool. */ + *cost += extra_cost->ldst.load; + } break; } case VEC_CONCAT: @@ -15867,7 +15870,8 @@ cost_plus: break; case VEC_DUPLICATE: /* Load using a DUP. */ - *cost = extra_cost->vect.dup; + if (speed) + *cost += extra_cost->vect.dup; return false; case VEC_SELECT: { @@ -15875,13 +15879,16 @@ cost_plus: *cost = rtx_cost (op0, GET_MODE (op0), VEC_SELECT, 0, speed); /* cost subreg of 0 as free, otherwise as DUP */ - rtx op1 = XEXP (x, 1); - if (vec_series_lowpart_p (mode, GET_MODE (op1), op1)) - ; - else if (vec_series_highpart_p (mode, GET_MODE (op1), op1)) - *cost = extra_cost->vect.dup; - else - *cost = extra_cost->vect.extract; + if (speed) + { + rtx op1 = XEXP (x, 1); + if (vec_series_lowpart_p (mode, GET_MODE (op1), op1)) + ; + else if (vec_series_highpart_p (mode, GET_MODE (op1), op1)) + *cost += extra_cost->vect.dup; + else + *cost += extra_cost->vect.extract; + } return true; } default: @@ -17969,6 +17976,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, /* Check if we've seen an SVE gather/scatter operation and which size. */ if (kind == scalar_load + && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)) && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER) { diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h index c7a14b3..0600e59 100644 --- a/gcc/config/arm/aarch-cost-tables.h +++ b/gcc/config/arm/aarch-cost-tables.h @@ -123,9 +123,9 @@ const struct cpu_cost_table generic_extra_costs = { COSTS_N_INSNS (1), /* alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -230,9 +230,9 @@ const struct cpu_cost_table cortexa53_extra_costs = { COSTS_N_INSNS (1), /* alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -337,9 +337,9 @@ const struct cpu_cost_table cortexa57_extra_costs = { COSTS_N_INSNS (1), /* alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -444,9 +444,9 @@ const struct cpu_cost_table cortexa76_extra_costs = { COSTS_N_INSNS (1), /* alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -551,9 +551,9 @@ const struct cpu_cost_table exynosm1_extra_costs = { COSTS_N_INSNS (0), /* alu. */ COSTS_N_INSNS (4), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; @@ -658,9 +658,9 @@ const struct cpu_cost_table xgene1_extra_costs = { COSTS_N_INSNS (2), /* alu. */ COSTS_N_INSNS (8), /* mult. */ - COSTS_N_INSNS (1), /* movi. */ - COSTS_N_INSNS (2), /* dup. */ - COSTS_N_INSNS (2) /* extract. */ + COSTS_N_INSNS (0), /* movi. */ + COSTS_N_INSNS (1), /* dup. */ + COSTS_N_INSNS (1) /* extract. */ } }; diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index 284f49d..6a88a27 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -4120,9 +4120,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop) JUMP_LABEL (cbranch) = xop[4]; ++LABEL_NUSES (xop[4]); - rtx_insn *seq1 = get_insns (); rtx_insn *last1 = get_last_insn (); - end_sequence (); + rtx_insn *seq1 = end_sequence (); emit_insn_after (seq1, insns[2]); @@ -4141,9 +4140,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop) emit_insn (pat_4); - rtx_insn *seq2 = get_insns (); rtx_insn *last2 = get_last_insn (); - end_sequence (); + rtx_insn *seq2 = end_sequence (); emit_insn_after (seq2, insns[3]); diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc index a34c9e9..4acdd1d 100644 --- a/gcc/config/cris/cris.cc +++ b/gcc/config/cris/cris.cc @@ -3711,9 +3711,11 @@ cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, /* Determine if the source using MOF. If it is, automatically clobbering MOF would cause it to have impossible constraints. */ - /* Look for a use of the MOF constraint letter: h. */ + /* Look for a use of the MOF constraint letter h or a hard register + constraint. */ for (unsigned i = 0, n = constraints.length(); i < n; ++i) - if (strchr (constraints[i], 'h') != NULL) + if (strchr (constraints[i], 'h') != NULL + || strstr (constraints[i], "{mof}") != NULL) return NULL; /* Look for an output or an input that touches MOF. */ diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 7c4dde1..3899117 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1133,6 +1133,23 @@ DONE; }) +(define_expand "gather_load<mode><vndi>" + [(match_operand:V_MOV 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:<VnDI> 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand")] + "" + { + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], + operands[2], operands[4], + INTVAL (operands[3]), NULL); + + emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, + const0_rtx, const0_rtx)); + DONE; + }) + ; Allow any address expression (define_expand "gather<mode>_expr<exec>" [(set (match_operand:V_MOV 0 "register_operand") @@ -1259,6 +1276,23 @@ DONE; }) +(define_expand "scatter_store<mode><vndi>" + [(match_operand:DI 0 "register_operand") + (match_operand:<VnDI> 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_MOV 4 "register_operand")] + "" + { + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], + operands[1], operands[3], + INTVAL (operands[2]), NULL); + + emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], + const0_rtx, const0_rtx)); + DONE; + }) + ; Allow any address expression (define_expand "scatter<mode>_expr<exec_scatter>" [(set (mem:BLK (scratch)) @@ -1645,6 +1679,39 @@ [(set_attr "type" "vmult") (set_attr "length" "8")]) +(define_insn_and_split "add<mode>3_dup" + [(set (match_operand:V_DI 0 "register_operand" "= v") + (plus:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "register_operand" "SvB")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDb"))) + (clobber (reg:DI VCC_REG)) + (clobber (match_scratch:<VnSI> 3 "=&v"))] + "" + "#" + "gcn_can_split_p (<MODE>mode, operands[0]) + && gcn_can_split_p (<MODE>mode, operands[1]) + && gcn_can_split_p (<MODE>mode, operands[2])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_add<vnsi>3_vcc_dup + (gcn_operand_part (<MODE>mode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (<MODE>mode, operands[2], 0), + vcc)); + emit_insn (gen_vec_duplicate<vnsi> (operands[3], + gcn_operand_part (DImode, operands[1], 1))); + emit_insn (gen_addc<vnsi>3 + (gcn_operand_part (<MODE>mode, operands[0], 1), + operands[3], + gcn_operand_part (<MODE>mode, operands[2], 1), + vcc, vcc)); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + (define_insn_and_split "add<mode>3_exec" [(set (match_operand:V_DI 0 "register_operand" "= v") (vec_merge:V_DI @@ -1682,6 +1749,49 @@ [(set_attr "type" "vmult") (set_attr "length" "8")]) +(define_insn_and_split "add<mode>3_dup_exec" + [(set (match_operand:V_DI 0 "register_operand" "= v") + (vec_merge:V_DI + (plus:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "register_operand" "SvB")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDb")) + (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) + (clobber (reg:DI VCC_REG)) + (clobber (match_scratch:<VnSI> 5 "=&v"))] + "" + "#" + "gcn_can_split_p (<MODE>mode, operands[0]) + && gcn_can_split_p (<MODE>mode, operands[1]) + && gcn_can_split_p (<MODE>mode, operands[2]) + && gcn_can_split_p (<MODE>mode, operands[4])" + [(const_int 0)] + { + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_add<vnsi>3_vcc_dup_exec + (gcn_operand_part (<MODE>mode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (<MODE>mode, operands[2], 0), + vcc, + gcn_operand_part (<MODE>mode, operands[3], 0), + operands[4])); + emit_insn (gen_vec_duplicate<vnsi>_exec (operands[5], + gcn_operand_part (DImode, operands[1], 1), + gcn_gen_undef (<VnSI>mode), + operands[4])); + emit_insn (gen_addc<vnsi>3_exec + (gcn_operand_part (<MODE>mode, operands[0], 1), + operands[5], + gcn_operand_part (<MODE>mode, operands[2], 1), + vcc, vcc, + gcn_operand_part (<MODE>mode, operands[3], 1), + operands[4])); + DONE; + } + [(set_attr "type" "vmult") + (set_attr "length" "8")]) + (define_insn_and_split "sub<mode>3" [(set (match_operand:V_DI 0 "register_operand" "= v, v") (minus:V_DI @@ -2187,6 +2297,22 @@ [(set_attr "type" "vop3a") (set_attr "length" "8")]) +(define_insn "<su>mul<mode>3_highpart_dup<exec>" + [(set (match_operand:V_SI 0 "register_operand" "= v") + (truncate:V_SI + (lshiftrt:<VnDI> + (mult:<VnDI> + (any_extend:<VnDI> + (vec_duplicate:V_SI + (match_operand:SI 1 "gcn_alu_operand" "SvA"))) + (any_extend:<VnDI> + (match_operand:V_SI 2 "gcn_alu_operand" " vA"))) + (const_int 32))))] + "" + "v_mul_hi<sgnsuffix>0\t%0, %2, %1" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + (define_insn "mul<mode>3<exec>" [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") (mult:V_INT_1REG @@ -2198,11 +2324,11 @@ (set_attr "length" "8")]) (define_insn "mul<mode>3_dup<exec>" - [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") + [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") (mult:V_INT_1REG - (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") (vec_duplicate:V_INT_1REG - (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))] + (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvA")) + (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vA")))] "" "v_mul_lo_u32\t%0, %1, %2" [(set_attr "type" "vop3a") @@ -2238,6 +2364,37 @@ DONE; }) +(define_insn_and_split "mul<mode>3_dup" + [(set (match_operand:V_DI 0 "register_operand" "=&v") + (mult:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "gcn_alu_operand" " Sv")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) + (clobber (match_scratch:<VnSI> 3 "=&v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); + rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); + rtx left_lo = gcn_operand_part (DImode, operands[1], 0); + rtx left_hi = gcn_operand_part (DImode, operands[1], 1); + rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); + rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); + rtx tmp = operands[3]; + + emit_insn (gen_mul<vnsi>3_dup (out_lo, left_lo, right_lo)); + emit_insn (gen_umul<vnsi>3_highpart_dup (out_hi, left_lo, right_lo)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_lo)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_lo, right_hi)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_hi)); + emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); + DONE; + }) + (define_insn_and_split "mul<mode>3_exec" [(set (match_operand:V_DI 0 "register_operand" "=&v") (vec_merge:V_DI @@ -2286,6 +2443,56 @@ DONE; }) +(define_insn_and_split "mul<mode>3_dup_exec" + [(set (match_operand:V_DI 0 "register_operand" "=&v") + (vec_merge:V_DI + (mult:V_DI + (vec_duplicate:V_DI + (match_operand:DI 1 "gcn_alu_operand" " Sv")) + (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) + (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") + (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) + (clobber (match_scratch:<VnSI> 5 "=&v"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); + rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); + rtx left_lo = gcn_operand_part (DImode, operands[1], 0); + rtx left_hi = gcn_operand_part (DImode, operands[1], 1); + rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); + rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); + rtx exec = operands[4]; + rtx tmp = operands[5]; + + rtx old_lo, old_hi; + if (GET_CODE (operands[3]) == UNSPEC) + { + old_lo = old_hi = gcn_gen_undef (<VnSI>mode); + } + else + { + old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); + old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); + } + + rtx undef = gcn_gen_undef (<VnSI>mode); + + emit_insn (gen_mul<vnsi>3_dup_exec (out_lo, left_lo, right_lo, old_lo, + exec)); + emit_insn (gen_umul<vnsi>3_highpart_dup_exec (out_hi, left_lo, right_lo, + old_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_lo, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_lo, right_hi, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_hi, undef, exec)); + emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); + DONE; + }) + (define_insn_and_split "mul<mode>3_zext" [(set (match_operand:V_DI 0 "register_operand" "=&v") (mult:V_DI @@ -4049,6 +4256,32 @@ DONE; }) +(define_expand "mask_gather_load<mode><vndi>" + [(set:V_MOV (match_operand:V_MOV 0 "register_operand") + (unspec:V_MOV + [(match_operand:DI 1 "register_operand") + (match_operand:<VnDI> 2 "register_operand") + (match_operand 3 "immediate_operand") + (match_operand:SI 4 "gcn_alu_operand") + (match_operand:DI 5 "") + (match_operand:V_MOV 6 "maskload_else_operand")] + UNSPEC_GATHER))] + "" + { + rtx exec = force_reg (DImode, operands[5]); + + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], + operands[2], operands[4], + INTVAL (operands[3]), exec); + + emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, + const0_rtx, const0_rtx, + const0_rtx, + gcn_gen_undef (<MODE>mode), + exec)); + DONE; + }) + (define_expand "mask_scatter_store<mode><vnsi>" [(match_operand:DI 0 "register_operand") (match_operand:<VnSI> 1 "register_operand") @@ -4077,6 +4310,27 @@ DONE; }) +(define_expand "mask_scatter_store<mode><vndi>" + [(match_operand:DI 0 "register_operand") + (match_operand:<VnDI> 1 "register_operand") + (match_operand 2 "immediate_operand") + (match_operand:SI 3 "gcn_alu_operand") + (match_operand:V_MOV 4 "register_operand") + (match_operand:DI 5 "")] + "" + { + rtx exec = force_reg (DImode, operands[5]); + + rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], + operands[1], operands[3], + INTVAL (operands[2]), exec); + + emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, + operands[4], const0_rtx, + const0_rtx, exec)); + DONE; + }) + (define_code_iterator cond_op [plus minus mult]) (define_expand "cond_<expander><mode>" @@ -4397,7 +4651,7 @@ rtx tmp = gen_reg_rtx (<MODE>mode); rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1)); - emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2])); + emit_insn (gen_mul<mode>3_dup (tmp, operands[2], v1)); emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1])); DONE; }) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 0ce5a29..3b26d5c 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -1275,13 +1275,13 @@ gen_##PREFIX##vN##SUFFIX (PARAMS) \ } #define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \ -GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \ -GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \ -GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \ +USE_QHF (GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS))) \ +USE_QHF (GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS))) \ +USE_QHF (GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS))) \ GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \ -GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \ +USE_QHF (GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS))) \ GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \ -GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \ +USE_QHF (GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS))) \ static rtx \ gen_##PREFIX##vNm##SUFFIX (PARAMS) \ { \ @@ -1289,13 +1289,13 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS) \ \ switch (mode) \ { \ - case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \ - case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \ - case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \ + USE_QHF (case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS);) \ + USE_QHF (case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS);) \ + USE_QHF (case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS);) \ case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \ - case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \ + USE_QHF (case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS);) \ case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \ - case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \ + USE_QHF (case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS);) \ default: \ break; \ } \ @@ -1340,13 +1340,13 @@ gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ } #define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \ -GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \ -GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \ -GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \ +USE_QHF (GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS))) \ +USE_QHF (GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS))) \ +USE_QHF (GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS))) \ GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \ -GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \ +USE_QHF (GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS))) \ GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \ -GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \ +USE_QHF (GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS))) \ USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \ static rtx \ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ @@ -1355,15 +1355,22 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ \ switch (mode) \ { \ - case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \ - case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \ - case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \ - case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \ - case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \ - case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \ - case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \ - case E_TImode: \ - USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \ + USE_QHF (case E_QImode: \ + return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec);) \ + USE_QHF (case E_HImode: \ + return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec);) \ + USE_QHF (case E_HFmode: \ + return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec);) \ + case E_SImode: \ + return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \ + USE_QHF (case E_SFmode: \ + return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec);) \ + case E_DImode: \ + return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \ + USE_QHF (case E_DFmode: \ + return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec);) \ + USE_TI (case E_TImode: \ + return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \ default: \ break; \ } \ @@ -1372,7 +1379,8 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ return NULL_RTX; \ } -/* These have TImode support. */ +/* These support everything. */ +#define USE_QHF(ARGS) ARGS #define USE_TI(ARGS) ARGS GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src)) GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src)) @@ -1382,6 +1390,7 @@ GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src)) #define USE_TI(ARGS) GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,di3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc), A(dest, src1, src2, vcc)) GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) @@ -1393,15 +1402,20 @@ GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc), GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin), A(dest, src1, src2, vccout, vccin)) GEN_VN (and,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) -GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift)) GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec), A(dest, addr, src, exec)) GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol), A(dest, addr, as, vol)) -GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c)) +/* These do not have QI, HI, or any FP support. */ +#undef USE_QHF +#define USE_QHF(ARGS) +GEN_VNM (ashl,3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift)) +GEN_VNM (mul,3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) + +#undef USE_QHF #undef USE_TI #undef GEN_VNM #undef GEN_VN @@ -1995,8 +2009,8 @@ gcn_expand_vector_init (rtx op0, rtx vec) rtx addr = gen_reg_rtx (addrmode); int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0))); - emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)), - GEN_INT (unit_size))); + emit_insn (gen_mulvNsi3_dup (ramp, GEN_INT (unit_size), + gen_rtx_REG (offsetmode, VGPR_REGNO (1)))); bool simple_repeat = true; @@ -2293,36 +2307,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, Return values. ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses. - ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */ + ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. + 64-bit offsets - return VnDImode vector of absolute addresses. */ rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale, bool unsigned_p, rtx exec) { int vf = GET_MODE_NUNITS (GET_MODE (offsets)); - rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode)); - rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode)); + rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets)); + rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode)); + bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode; if (CONST_INT_P (scale) && INTVAL (scale) > 0 && exact_log2 (INTVAL (scale)) >= 0) - emit_insn (gen_ashlvNsi3 (tmpsi, offsets, - GEN_INT (exact_log2 (INTVAL (scale))), - NULL, exec)); + emit_insn (gen_ashlvNm3 (scaled_offsets, offsets, + GEN_INT (exact_log2 (INTVAL (scale))), + NULL, exec)); else - emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec)); + emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec)); + /* No instructions support DImode offsets. */ + if (use_di) + { + emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, exec)); + return abs_addr; + } /* "Global" instructions do not support negative register offsets. */ - if (as == ADDR_SPACE_FLAT || !unsigned_p) + else if (as == ADDR_SPACE_FLAT || !unsigned_p) { if (unsigned_p) - emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec)); + emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base, + NULL, exec)); else - emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec)); - return tmpdi; + emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base, + NULL, exec)); + return abs_addr; } else if (as == ADDR_SPACE_GLOBAL) - return tmpsi; + return scaled_offsets; gcc_unreachable (); } diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 49bd393..4682db85 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -24788,6 +24788,12 @@ static void map_egpr_constraints (vec<const char *> &constraints) buf.safe_push (cur[j + 1]); j++; break; + case '{': + do + { + buf.safe_push (cur[j]); + } while (cur[j++] != '}'); + break; default: buf.safe_push (cur[j]); break; diff --git a/gcc/config/pru/pru-pragma.cc b/gcc/config/pru/pru-pragma.cc index c3f3d33..9338780 100644 --- a/gcc/config/pru/pru-pragma.cc +++ b/gcc/config/pru/pru-pragma.cc @@ -46,21 +46,24 @@ pru_pragma_ctable_entry (cpp_reader *) enum cpp_ttype type; type = pragma_lex (&ctable_index); - if (type == CPP_NUMBER && tree_fits_uhwi_p (ctable_index)) + if (type == CPP_NUMBER && tree_fits_shwi_p (ctable_index)) { type = pragma_lex (&base_addr); - if (type == CPP_NUMBER && tree_fits_uhwi_p (base_addr)) + if (type == CPP_NUMBER && tree_fits_shwi_p (base_addr)) { - unsigned HOST_WIDE_INT i = tree_to_uhwi (ctable_index); - unsigned HOST_WIDE_INT base = tree_to_uhwi (base_addr); + HOST_WIDE_INT i = tree_to_shwi (ctable_index); + HOST_WIDE_INT base = sext_hwi (tree_to_shwi (base_addr), + POINTER_SIZE); type = pragma_lex (&base_addr); if (type != CPP_EOF) error ("junk at end of %<#pragma CTABLE_ENTRY%>"); - else if (i >= ARRAY_SIZE (pru_ctable)) + else if (!IN_RANGE (i, 0, ARRAY_SIZE (pru_ctable) - 1)) error ("%<CTABLE_ENTRY%> index %wd is not valid", i); else if (pru_ctable[i].valid && pru_ctable[i].base != base) error ("redefinition of %<CTABLE_ENTRY %wd%>", i); + else if (!IN_RANGE (base, INT32_MIN, INT32_MAX)) + error ("%<CTABLE_ENTRY%> base address does not fit in 32-bits"); else { if (base & 0xff) diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h index c73fad8..4750f0e 100644 --- a/gcc/config/pru/pru-protos.h +++ b/gcc/config/pru/pru-protos.h @@ -23,7 +23,7 @@ struct pru_ctable_entry { bool valid; - unsigned HOST_WIDE_INT base; + HOST_WIDE_INT base; }; extern struct pru_ctable_entry pru_ctable[32]; @@ -66,9 +66,9 @@ pru_regno_ok_for_index_p (int regno, bool strict_p) return pru_regno_ok_for_base_p (regno, strict_p); } -extern int pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr); -extern int pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr); -extern int pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr); +extern int pru_get_ctable_exact_base_index (HOST_WIDE_INT caddr); +extern int pru_get_ctable_base_index (HOST_WIDE_INT caddr); +extern int pru_get_ctable_base_offset (HOST_WIDE_INT caddr); extern int pru_symref2ioregno (rtx op); diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc index 47e5f24..322e319 100644 --- a/gcc/config/pru/pru.cc +++ b/gcc/config/pru/pru.cc @@ -1428,7 +1428,7 @@ pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset) /* Recognize a CTABLE base address. Return CTABLE entry index, or -1 if base was not found in the pragma-filled pru_ctable. */ int -pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr) +pru_get_ctable_exact_base_index (HOST_WIDE_INT caddr) { unsigned int i; @@ -1444,7 +1444,7 @@ pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr) /* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS, and return the base CTABLE index if possible. */ int -pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr) +pru_get_ctable_base_index (HOST_WIDE_INT caddr) { unsigned int i; @@ -1461,7 +1461,7 @@ pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr) /* Return the offset from some CTABLE base for this address. */ int -pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr) +pru_get_ctable_base_offset (HOST_WIDE_INT caddr) { int i; @@ -2004,7 +2004,7 @@ pru_print_operand_address (FILE *file, machine_mode mode, rtx op) case CONST_INT: { - unsigned HOST_WIDE_INT caddr = INTVAL (op); + HOST_WIDE_INT caddr = INTVAL (op); int base = pru_get_ctable_base_index (caddr); int offs = pru_get_ctable_base_offset (caddr); if (base < 0) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index f372f0e..d884942 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1714,6 +1714,68 @@ } [(set_attr "type" "vialu")]) +(define_insn_and_split "*uavg_floor_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (if_then_else:V_VLSI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 5 "vector_length_operand") + (match_operand 6 "const_int_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE) + (unspec:V_VLSI + [(match_operand:V_VLSI 3 "register_operand") + (vec_duplicate:V_VLSI + (match_operand:<VEL> 4 "register_operand"))] UNSPEC_VAADDU) + (unspec:V_VLSI + [(match_operand:DI 2 "register_operand")] UNSPEC_VUNDEF)))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code code = code_for_pred_scalar (UNSPEC_VAADDU, <MODE>mode); + rtx ops[] = {operands[0], operands[3], operands[4]}; + riscv_vector::emit_vlmax_insn (code, riscv_vector::BINARY_OP_VXRM_RDN, ops); + DONE; + } + [(set_attr "type" "vaalu")]) + +(define_insn_and_split "*uavg_floor_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (if_then_else:V_VLSI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 5 "vector_length_operand") + (match_operand 6 "const_int_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM) + (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE) + (unspec:V_VLSI + [(vec_duplicate:V_VLSI + (match_operand:<VEL> 4 "register_operand")) + (match_operand:V_VLSI 3 "register_operand")] UNSPEC_VAADDU) + (unspec:V_VLSI + [(match_operand:DI 2 "register_operand")] UNSPEC_VUNDEF)))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code code = code_for_pred_scalar (UNSPEC_VAADDU, <MODE>mode); + rtx ops[] = {operands[0], operands[3], operands[4]}; + riscv_vector::emit_vlmax_insn (code, riscv_vector::BINARY_OP_VXRM_RDN, ops); + DONE; + } + [(set_attr "type" "vaalu")]) + ;; ============================================================================= ;; Combine vec_duplicate + op.vv to op.vf ;; Include @@ -1844,3 +1906,54 @@ } [(set_attr "type" "vfwmuladd")] ) + +;; vfwnmacc.vf +(define_insn_and_split "*vfwnmacc_vf_<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (minus:VWEXTF + (mult:VWEXTF + (neg:VWEXTF + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 2 "register_operand")))) + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))) + (match_operand:VWEXTF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[1], operands[2], operands[3]}; + riscv_vector::emit_vlmax_insn( + code_for_pred_widen_mul_neg_scalar(MINUS, <MODE>mode), + riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops); + DONE; + } + [(set_attr "type" "vfwmuladd")] +) + +;; vfwnmsac.vf +(define_insn_and_split "*vfwnmsac_vf_<mode>" + [(set (match_operand:VWEXTF 0 "register_operand") + (minus:VWEXTF + (match_operand:VWEXTF 1 "register_operand") + (mult:VWEXTF + (float_extend:VWEXTF + (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")) + (vec_duplicate:VWEXTF + (float_extend:<VEL> + (match_operand:<VSUBEL> 2 "register_operand"))))))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + rtx ops[] = {operands[0], operands[1], operands[2], operands[3]}; + riscv_vector::emit_vlmax_insn( + code_for_pred_widen_mul_neg_scalar (PLUS, <MODE>mode), + riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops); + DONE; + } + [(set_attr "type" "vfwmuladd")] +) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index da5d0e3..1fff8ac 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2489,7 +2489,8 @@ (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) (sign_extend:VWEXTI - (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))] + (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))) + (const_int 1))))] "TARGET_VECTOR" { insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode); @@ -2522,7 +2523,8 @@ (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))) - (const_int 1)))))] + (const_int 1)) + (const_int 1))))] "TARGET_VECTOR" { insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode); diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md index ab9e57f..773003b 100644 --- a/gcc/config/riscv/generic-vector-ooo.md +++ b/gcc/config/riscv/generic-vector-ooo.md @@ -17,6 +17,9 @@ ;; <http://www.gnu.org/licenses/>. ;; Vector load/store +;; The insn reservations include "generic" as we won't have a in-order +;; generic definition for vector instructions. + (define_automaton "vector_ooo") ;; Separate issue queue for vector instructions. @@ -29,119 +32,141 @@ (define_cpu_unit "vxu_ooo_multicycle" "vector_ooo") (define_insn_reservation "vec_load" 6 - (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr")) "vxu_ooo_issue,vxu_ooo_alu") (define_insn_reservation "vec_store" 6 - (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector segment loads/stores. (define_insn_reservation "vec_loadstore_seg" 10 - (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff,\ - vssegte,vssegts,vssegtux,vssegtox") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff,\ + vssegte,vssegts,vssegtux,vssegtox")) "vxu_ooo_issue,vxu_ooo_alu") ;; Regular vector operations and integer comparisons. (define_insn_reservation "vec_alu" 3 - (eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\ - vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\ - vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\ + vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\ + vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector float comparison, conversion etc. (define_insn_reservation "vec_fcmp" 3 - (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\ - vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\ - vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\ + vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\ + vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector integer multiplication. (define_insn_reservation "vec_imul" 4 - (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\ - vghsh,vgmul") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\ + vghsh,vgmul")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector float addition. (define_insn_reservation "vec_fadd" 4 - (eq_attr "type" "vfalu,vfwalu") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vfalu,vfwalu")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector float multiplication and FMA. (define_insn_reservation "vec_fmul" 6 - (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc,sf_vfnrclip") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc,sf_vfnrclip")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector crypto, assumed to be a generic operation for now. (define_insn_reservation "vec_crypto" 4 - (eq_attr "type" "crypto,vclz,vctz,vcpop") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "crypto,vclz,vctz,vcpop")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector crypto, AES (define_insn_reservation "vec_crypto_aes" 4 - (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector crypto, sha (define_insn_reservation "vec_crypto_sha" 4 - (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector crypto, SM3/4 (define_insn_reservation "vec_crypto_sm" 4 - (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector permute. (define_insn_reservation "vec_perm" 3 - (eq_attr "type" "vimerge,vfmerge,vslideup,vslidedown,vislide1up,\ - vislide1down,vfslide1up,vfslide1down,vgather,vcompress") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vimerge,vfmerge,vslideup,vslidedown,vislide1up,\ + vislide1down,vfslide1up,vfslide1down,vgather,vcompress")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector reduction. (define_insn_reservation "vec_reduction" 8 - (eq_attr "type" "vired,viwred,vfredu,vfwredu") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vired,viwred,vfredu,vfwredu")) "vxu_ooo_issue,vxu_ooo_multicycle") ;; Vector ordered reduction, assume the latency number is for ;; a 128-bit vector. It is scaled in riscv_sched_adjust_cost ;; for larger vectors. (define_insn_reservation "vec_ordered_reduction" 10 - (eq_attr "type" "vfredo,vfwredo") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vfredo,vfwredo")) "vxu_ooo_issue,vxu_ooo_multicycle*3") ;; Vector integer division, assume not pipelined. (define_insn_reservation "vec_idiv" 16 - (eq_attr "type" "vidiv") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vidiv")) "vxu_ooo_issue,vxu_ooo_multicycle*3") ;; Vector float divisions and sqrt, assume not pipelined. (define_insn_reservation "vec_float_divsqrt" 16 - (eq_attr "type" "vfdiv,vfsqrt") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vfdiv,vfsqrt")) "vxu_ooo_issue,vxu_ooo_multicycle*3") ;; Vector mask operations. (define_insn_reservation "vec_mask" 2 - (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,\ - vfmovvf,vfmovfv") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,\ + vfmovvf,vfmovfv")) "vxu_ooo_issue,vxu_ooo_alu") ;; Vector vsetvl. (define_insn_reservation "vec_vesetvl" 1 - (eq_attr "type" "vsetvl,vsetvl_pre") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "vsetvl,vsetvl_pre")) "vxu_ooo_issue") ;; Vector rounding mode setters, assume pipeline barrier. (define_insn_reservation "vec_setrm" 20 - (eq_attr "type" "wrvxrm,wrfrm") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "wrvxrm,wrfrm")) "vxu_ooo_issue,vxu_ooo_issue*3") ;; Vector read vlen/vlenb. (define_insn_reservation "vec_readlen" 4 - (eq_attr "type" "rdvlenb,rdvl") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "rdvlenb,rdvl")) "vxu_ooo_issue,vxu_ooo_issue") ;; Vector sf_vcp. (define_insn_reservation "vec_sf_vcp" 2 - (eq_attr "type" "sf_vc,sf_vc_se") + (and (eq_attr "tune" "generic_ooo,generic") + (eq_attr "type" "sf_vc,sf_vc_se")) "vxu_ooo_issue") diff --git a/gcc/config/riscv/mips-p8700.md b/gcc/config/riscv/mips-p8700.md index ae0ea8d..fac9abb 100644 --- a/gcc/config/riscv/mips-p8700.md +++ b/gcc/config/riscv/mips-p8700.md @@ -163,5 +163,5 @@ vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll, vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz, vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16, - sf_vc,sf_vc_se")) + sf_vc,sf_vc_se,ghost")) "mips_p8700_dummies") diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 1275b03..3324819 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3967,13 +3967,27 @@ get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost) { gcc_assert (riscv_v_ext_mode_p (GET_MODE (x))); - rtx op_0 = XEXP (x, 0); - rtx op_1 = XEXP (x, 1); + rtx neg; + rtx op_0; + rtx op_1; + + if (GET_CODE (x) == UNSPEC) + { + op_0 = XVECEXP (x, 0, 0); + op_1 = XVECEXP (x, 0, 1); + } + else + { + op_0 = XEXP (x, 0); + op_1 = XEXP (x, 1); + } if (GET_CODE (op_0) == VEC_DUPLICATE || GET_CODE (op_1) == VEC_DUPLICATE) return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); - else if (GET_CODE (op_0) == NEG && GET_CODE (op_1) == VEC_DUPLICATE) + else if (GET_CODE (neg = op_0) == NEG + && (GET_CODE (op_1) == VEC_DUPLICATE + || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE)) return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); else return COSTS_N_INSNS (1); @@ -4021,6 +4035,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN case SS_MINUS: *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); break; + case UNSPEC: + { + switch (XINT (op, 1)) + { + case UNSPEC_VAADDU: + *total + = get_vector_binary_rtx_cost (op, scalar2vr_cost); + break; + default: + *total = COSTS_N_INSNS (1); + break; + } + } + break; default: *total = COSTS_N_INSNS (1); break; @@ -9049,7 +9077,7 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size) /* We want the CFA independent of the stack pointer for the duration of the loop. */ add_reg_note (insn, REG_CFA_DEF_CFA, - plus_constant (Pmode, temp1, + plus_constant (Pmode, temp2, initial_cfa_offset + rounded_size)); RTX_FRAME_RELATED_P (insn) = 1; } @@ -10359,10 +10387,10 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); bool sched1 = can_create_pseudo_p (); - unsigned int prev_dest_regno = (REG_P (SET_DEST (prev_set)) + unsigned int prev_dest_regno = (prev_set && REG_P (SET_DEST (prev_set)) ? REGNO (SET_DEST (prev_set)) : FIRST_PSEUDO_REGISTER); - unsigned int curr_dest_regno = (REG_P (SET_DEST (curr_set)) + unsigned int curr_dest_regno = (curr_set && REG_P (SET_DEST (curr_set)) ? REGNO (SET_DEST (curr_set)) : FIRST_PSEUDO_REGISTER); diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index baf215b..c498166 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -4639,8 +4639,8 @@ ;; Handle GET_MODE_INNER (mode) = DImode. We need to split them since ;; we need to deal with SEW = 64 in RV32 system. (define_expand "@pred_<sat_op><mode>_scalar" - [(set (match_operand:VI_D 0 "register_operand") - (if_then_else:VI_D + [(set (match_operand:V_VLSI_D 0 "register_operand") + (if_then_else:V_VLSI_D (unspec:<VM> [(match_operand:<VM> 1 "vector_mask_operand") (match_operand 5 "vector_length_operand") @@ -4651,10 +4651,10 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE) - (unspec:VI_D - [(match_operand:VI_D 3 "register_operand") + (unspec:V_VLSI_D + [(match_operand:V_VLSI_D 3 "register_operand") (match_operand:<VEL> 4 "reg_or_int_operand")] VSAT_ARITH_OP) - (match_operand:VI_D 2 "vector_merge_operand")))] + (match_operand:V_VLSI_D 2 "vector_merge_operand")))] "TARGET_VECTOR" { if (riscv_vector::sew64_scalar_helper ( @@ -4673,8 +4673,8 @@ }) (define_insn "*pred_<sat_op><mode>_scalar" - [(set (match_operand:VI_D 0 "register_operand" "=vd, vr, vd, vr") - (if_then_else:VI_D + [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr, vd, vr") + (if_then_else:V_VLSI_D (unspec:<VM> [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") @@ -4685,18 +4685,18 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE) - (unspec:VI_D - [(match_operand:VI_D 3 "register_operand" " vr, vr, vr, vr") + (unspec:V_VLSI_D + [(match_operand:V_VLSI_D 3 "register_operand" " vr, vr, vr, vr") (match_operand:<VEL> 4 "reg_or_0_operand" " rJ, rJ, rJ, rJ")] VSAT_ARITH_OP) - (match_operand:VI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (match_operand:V_VLSI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "v<sat_op>.vx\t%0,%3,%z4%p1" [(set_attr "type" "<sat_insn_type>") (set_attr "mode" "<MODE>")]) (define_insn "*pred_<sat_op><mode>_extended_scalar" - [(set (match_operand:VI_D 0 "register_operand" "=vd, vr, vd, vr") - (if_then_else:VI_D + [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr, vd, vr") + (if_then_else:V_VLSI_D (unspec:<VM> [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") @@ -4707,11 +4707,11 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE) - (unspec:VI_D - [(match_operand:VI_D 3 "register_operand" " vr, vr, vr, vr") + (unspec:V_VLSI_D + [(match_operand:V_VLSI_D 3 "register_operand" " vr, vr, vr, vr") (sign_extend:<VEL> (match_operand:<VSUBEL> 4 "reg_or_0_operand" " rJ, rJ, rJ, rJ"))] VSAT_ARITH_OP) - (match_operand:VI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (match_operand:V_VLSI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR && !TARGET_64BIT" "v<sat_op>.vx\t%0,%3,%z4%p1" [(set_attr "type" "<sat_insn_type>") diff --git a/gcc/config/riscv/xiangshan.md b/gcc/config/riscv/xiangshan.md index 5ed6bac..34b4a8f 100644 --- a/gcc/config/riscv/xiangshan.md +++ b/gcc/config/riscv/xiangshan.md @@ -107,7 +107,8 @@ ;; they are just dummies like this one. (define_insn_reservation "xiangshan_alu_unknown" 1 (and (eq_attr "tune" "xiangshan") - (eq_attr "type" "zicond,min,max,minu,maxu,clz,ctz,cpop,ghost,rotate,clmul,condmove,crypto,mvpair,rdvlenb,rdvl,wrvxrm,wrfrm,rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,vldux,vldox,vstux,vstox,vldff,vldr,vstr,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16")) + (eq_attr "type" "zicond,min,max,minu,maxu,clz,ctz,cpop,ghost,rotate,clmul,condmove,crypto,mvpair,rdvlenb,rdvl,wrvxrm,wrfrm,rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,vldux,vldox,vstux,vstox,vldff,vldr,vstr,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,sf_vc,sf_vc_se")) + "xs_alu_rs") ;; ---------------------------------------------------- diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index b5e636c..a474e13 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -17843,9 +17843,11 @@ f_constraint_p (const char *constraint) for (size_t i = 0, c_len = strlen (constraint); i < c_len; i += CONSTRAINT_LEN (constraint[i], constraint + i)) { - if (constraint[i] == 'f') + if (constraint[i] == 'f' + || (constraint[i] == '{' && constraint[i + 1] == 'f')) seen_f_p = true; - if (constraint[i] == 'v') + if (constraint[i] == 'v' + || (constraint[i] == '{' && constraint[i + 1] == 'v')) seen_v_p = true; } @@ -17935,7 +17937,8 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, continue; bool allows_mem, allows_reg, is_inout; bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs, - &allows_mem, &allows_reg, &is_inout); + &allows_mem, &allows_reg, &is_inout, + nullptr); gcc_assert (ok); if (!f_constraint_p (constraint)) /* Long double with a constraint other than "=f" - nothing to do. */ @@ -17980,7 +17983,7 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs, bool allows_mem, allows_reg; bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0, constraints.address (), &allows_mem, - &allows_reg); + &allows_reg, nullptr); gcc_assert (ok); if (!f_constraint_p (constraint)) /* Long double with a constraint other than "f" (or "=f" for inout diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index b75cec1..02554c5 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -601,8 +601,8 @@ constantpool_address_p (const_rtx addr) /* Make sure the address is word aligned. */ offset = XEXP (addr, 1); - if ((!CONST_INT_P (offset)) - || ((INTVAL (offset) & 3) != 0)) + if (! CONST_INT_P (offset) + || (INTVAL (offset) & 3) != 0) return false; sym = XEXP (addr, 0); @@ -611,6 +611,7 @@ constantpool_address_p (const_rtx addr) if (SYMBOL_REF_P (sym) && CONSTANT_POOL_ADDRESS_P (sym)) return true; + return false; } @@ -4694,29 +4695,32 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code, } } +/* Return TRUE if the specified insn corresponds to one or more L32R machine + instructions. */ + static bool xtensa_is_insn_L32R_p (const rtx_insn *insn) { - rtx x = PATTERN (insn); + rtx pat, dest, src; - if (GET_CODE (x) != SET) + /* "PATTERN (insn)" can be used without checking, see insn_cost() + in gcc/rtlanal.cc. */ + if (GET_CODE (pat = PATTERN (insn)) != SET + || ! register_operand (dest = SET_DEST (pat), VOIDmode)) return false; - x = XEXP (x, 1); - if (MEM_P (x)) - { - x = XEXP (x, 0); - return (SYMBOL_REF_P (x) || CONST_INT_P (x)) - && CONSTANT_POOL_ADDRESS_P (x); - } - - /* relaxed MOVI instructions, that will be converted to L32R by the - assembler. */ - if (CONST_INT_P (x) - && ! xtensa_simm12b (INTVAL (x))) + if (constantpool_mem_p (src = SET_SRC (pat))) return true; - return false; + /* Return true if: + - CONST16 instruction is not configured, and + - the source is some constant, and also + - negation of "the source is integer and fits into the immediate + field". */ + return (!TARGET_CONST16 + && CONSTANT_P (src) + && ! ((GET_MODE (dest) == SImode || GET_MODE (dest) == HImode) + && CONST_INT_P (src) && xtensa_simm12b (INTVAL (src)))); } /* Compute a relative costs of RTL insns. This is necessary in order to @@ -4725,7 +4729,7 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn) static int xtensa_insn_cost (rtx_insn *insn, bool speed) { - if (!(recog_memoized (insn) < 0)) + if (! (recog_memoized (insn) < 0)) { int len = get_attr_length (insn); @@ -4738,7 +4742,7 @@ xtensa_insn_cost (rtx_insn *insn, bool speed) /* "L32R" may be particular slow (implementation-dependent). */ if (xtensa_is_insn_L32R_p (insn)) - return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs); + return COSTS_N_INSNS ((1 + xtensa_extra_l32r_costs) * n); /* Cost based on the pipeline model. */ switch (get_attr_type (insn)) @@ -4783,7 +4787,7 @@ xtensa_insn_cost (rtx_insn *insn, bool speed) { /* "L32R" itself plus constant in litpool. */ if (xtensa_is_insn_L32R_p (insn)) - len = 3 + 4; + len += (len / 3) * 4; /* Consider fractional instruction length (for example, ".n" short instructions or "L32R" litpool constants. */ diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 029be99..629dfdd 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -1297,7 +1297,10 @@ std::swap (operands[0], operands[1]); std::swap (operands[2], operands[3]); } -}) +} + [(set_attr "type" "move,move,load,load,store") + (set_attr "mode" "DI") + (set_attr "length" "6,12,6,6,6")]) (define_split [(set (match_operand:DI 0 "register_operand") @@ -1344,7 +1347,7 @@ %v0s32i\t%1, %0 rsr\t%0, ACCLO wsr\t%1, ACCLO" - [(set_attr "type" "move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr") + [(set_attr "type" "move,move,move,load,store,store,move,move,move,load,move,load,load,store,rsr,wsr") (set_attr "mode" "SI") (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) @@ -1410,7 +1413,7 @@ %v0s16i\t%1, %0 rsr\t%0, ACCLO wsr\t%1, ACCLO" - [(set_attr "type" "move,move,move,move,move,load,load,store,rsr,wsr") + [(set_attr "type" "move,move,move,move,load,load,load,store,rsr,wsr") (set_attr "mode" "HI") (set_attr "length" "2,2,3,3,3,3,3,3,3,3")]) @@ -1519,7 +1522,7 @@ const16\t%0, %t1\;const16\t%0, %b1 %v1l32i\t%0, %1 %v0s32i\t%1, %0" - [(set_attr "type" "farith,fload,fstore,move,load,load,store,move,farith,farith,move,move,load,store") + [(set_attr "type" "farith,fload,fstore,move,load,load,store,move,farith,farith,load,move,load,store") (set_attr "mode" "SF") (set_attr "length" "3,3,3,2,3,2,2,3,3,3,3,6,3,3")]) @@ -1643,7 +1646,10 @@ std::swap (operands[0], operands[1]); std::swap (operands[2], operands[3]); } -}) +} + [(set_attr "type" "move,load,move,load,load,store") + (set_attr "mode" "DF") + (set_attr "length" "6,6,12,6,6,6")]) ;; Block moves |