diff options
| -rw-r--r-- | gcc/ada/sem_eval.adb | 9 | ||||
| -rw-r--r-- | gcc/analyzer/checker-event.cc | 3 | ||||
| -rw-r--r-- | gcc/config/bpf/bpf.cc | 24 | ||||
| -rw-r--r-- | gcc/config/riscv/autovec.md | 56 | ||||
| -rw-r--r-- | gcc/config/riscv/riscv-v.cc | 62 | ||||
| -rw-r--r-- | gcc/gdbhooks.py | 33 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.c-torture/compile/pr122588-1.c | 25 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pr122588-1.c | 30 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.dg/vect/complex/vect-complex-operations-run.c (renamed from gcc/testsuite/gcc.dg/vect/complex/complex-operations-run.c) | 0 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-bool-3.c | 13 | ||||
| -rw-r--r-- | gcc/testsuite/gnat.dg/class_wide6.adb | 9 | ||||
| -rw-r--r-- | gcc/testsuite/gnat.dg/class_wide6.ads | 19 | ||||
| -rw-r--r-- | gcc/testsuite/gnat.dg/class_wide6_pkg.ads | 9 | ||||
| -rw-r--r-- | gcc/tree-ssa-forwprop.cc | 7 | ||||
| -rw-r--r-- | gcc/tree-vect-data-refs.cc | 370 | ||||
| -rw-r--r-- | gcc/tree-vect-stmts.cc | 150 | ||||
| -rw-r--r-- | gcc/tree-vectorizer.h | 13 | ||||
| -rw-r--r-- | gcc/tree.cc | 26 | ||||
| -rw-r--r-- | gcc/tree.h | 15 |
19 files changed, 663 insertions, 210 deletions
diff --git a/gcc/ada/sem_eval.adb b/gcc/ada/sem_eval.adb index f970932..7640149 100644 --- a/gcc/ada/sem_eval.adb +++ b/gcc/ada/sem_eval.adb @@ -6837,6 +6837,15 @@ package body Sem_Eval is then return True; + -- Handle class-wide subtypes, which never have discriminants, while + -- class-wide types may have them (but they are always unknown). + + elsif Ekind (T2) = E_Class_Wide_Subtype and then Etype (T2) = T1 then + return True; + + elsif Ekind (T1) = E_Class_Wide_Subtype and then Etype (T1) = T2 then + return True; + -- Because of view exchanges in multiple instantiations, conformance -- checking might try to match a partial view of a type with no -- discriminants with a full view that has defaulted discriminants. diff --git a/gcc/analyzer/checker-event.cc b/gcc/analyzer/checker-event.cc index 3e54c2a..57e36d4 100644 --- a/gcc/analyzer/checker-event.cc +++ b/gcc/analyzer/checker-event.cc @@ -361,7 +361,8 @@ region_creation_event_allocation_size::print_desc (pretty_printer &pp) const "allocated %qE bytes here", m_capacity); } - pp_printf (&pp, "allocated here"); + else + pp_printf (&pp, "allocated here"); } void diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc index a28018b..39168f6 100644 --- a/gcc/config/bpf/bpf.cc +++ b/gcc/config/bpf/bpf.cc @@ -1252,13 +1252,11 @@ static void emit_move_loop (rtx src, rtx dst, machine_mode mode, int offset, int inc, unsigned iters, unsigned remainder) { - rtx reg = gen_reg_rtx (mode); - /* First copy in chunks as large as alignment permits. */ for (unsigned int i = 0; i < iters; i++) { - emit_move_insn (reg, adjust_address (src, mode, offset)); - emit_move_insn (adjust_address (dst, mode, offset), reg); + emit_insn (gen_rtx_SET (adjust_address (dst, mode, offset), + adjust_address (src, mode, offset))); offset += inc; } @@ -1266,22 +1264,22 @@ emit_move_loop (rtx src, rtx dst, machine_mode mode, int offset, int inc, used above. */ if (remainder & 4) { - emit_move_insn (reg, adjust_address (src, SImode, offset)); - emit_move_insn (adjust_address (dst, SImode, offset), reg); + emit_insn (gen_rtx_SET (adjust_address (dst, SImode, offset), + adjust_address (src, SImode, offset))); offset += (inc < 0 ? -4 : 4); remainder -= 4; } if (remainder & 2) { - emit_move_insn (reg, adjust_address (src, HImode, offset)); - emit_move_insn (adjust_address (dst, HImode, offset), reg); + emit_insn (gen_rtx_SET (adjust_address (dst, HImode, offset), + adjust_address (src, HImode, offset))); offset += (inc < 0 ? -2 : 2); remainder -= 2; } if (remainder & 1) { - emit_move_insn (reg, adjust_address (src, QImode, offset)); - emit_move_insn (adjust_address (dst, QImode, offset), reg); + emit_insn (gen_rtx_SET (adjust_address (dst, QImode, offset), + adjust_address (src, QImode, offset))); } } @@ -1351,13 +1349,13 @@ bpf_expand_cpymem (rtx *operands, bool is_move) fwd_label = gen_label_rtx (); done_label = gen_label_rtx (); - rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (dst, 0)); - rtx src_addr = copy_to_mode_reg (Pmode, XEXP (src, 0)); + rtx src_addr = force_operand (XEXP (src, 0), NULL_RTX); + rtx dst_addr = force_operand (XEXP (dst, 0), NULL_RTX); emit_cmp_and_jump_insns (src_addr, dst_addr, GEU, NULL_RTX, Pmode, true, fwd_label, profile_probability::even ()); /* Emit the "backwards" unrolled loop. */ - emit_move_loop (src, dst, mode, size_bytes, -inc, iters, remainder); + emit_move_loop (src, dst, mode, (size_bytes - 1), -inc, iters, remainder); emit_jump_insn (gen_jump (done_label)); emit_barrier (); diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 48de5ef..4f0a1ce 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -55,8 +55,8 @@ [(match_operand:RATIO64 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO64I 2 "register_operand") - (match_operand 3 "<RATIO64:gs_extension>") - (match_operand 4 "<RATIO64:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO64:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -71,8 +71,8 @@ [(match_operand:RATIO32 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO32I 2 "register_operand") - (match_operand 3 "<RATIO32:gs_extension>") - (match_operand 4 "<RATIO32:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO32:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -87,8 +87,8 @@ [(match_operand:RATIO16 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO16I 2 "register_operand") - (match_operand 3 "<RATIO16:gs_extension>") - (match_operand 4 "<RATIO16:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO16:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -103,8 +103,8 @@ [(match_operand:RATIO8 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO8I 2 "register_operand") - (match_operand 3 "<RATIO8:gs_extension>") - (match_operand 4 "<RATIO8:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO8:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -119,8 +119,8 @@ [(match_operand:RATIO4 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO4I 2 "register_operand") - (match_operand 3 "<RATIO4:gs_extension>") - (match_operand 4 "<RATIO4:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO4:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -135,8 +135,8 @@ [(match_operand:RATIO2 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO2I 2 "register_operand") - (match_operand 3 "<RATIO2:gs_extension>") - (match_operand 4 "<RATIO2:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO2:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -155,8 +155,8 @@ [(match_operand:RATIO1 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO1 2 "register_operand") - (match_operand 3 "<gs_extension>") - (match_operand 4 "<gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -174,8 +174,8 @@ (define_expand "mask_len_scatter_store<RATIO64:mode><RATIO64I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO64I 1 "register_operand") - (match_operand 2 "<RATIO64:gs_extension>") - (match_operand 3 "<RATIO64:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO64 4 "register_operand") (match_operand:<RATIO64:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -189,8 +189,8 @@ (define_expand "mask_len_scatter_store<RATIO32:mode><RATIO32I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO32I 1 "register_operand") - (match_operand 2 "<RATIO32:gs_extension>") - (match_operand 3 "<RATIO32:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO32 4 "register_operand") (match_operand:<RATIO32:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -204,8 +204,8 @@ (define_expand "mask_len_scatter_store<RATIO16:mode><RATIO16I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO16I 1 "register_operand") - (match_operand 2 "<RATIO16:gs_extension>") - (match_operand 3 "<RATIO16:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO16 4 "register_operand") (match_operand:<RATIO16:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -219,8 +219,8 @@ (define_expand "mask_len_scatter_store<RATIO8:mode><RATIO8I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO8I 1 "register_operand") - (match_operand 2 "<RATIO8:gs_extension>") - (match_operand 3 "<RATIO8:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO8 4 "register_operand") (match_operand:<RATIO8:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -234,8 +234,8 @@ (define_expand "mask_len_scatter_store<RATIO4:mode><RATIO4I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO4I 1 "register_operand") - (match_operand 2 "<RATIO4:gs_extension>") - (match_operand 3 "<RATIO4:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO4 4 "register_operand") (match_operand:<RATIO4:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -249,8 +249,8 @@ (define_expand "mask_len_scatter_store<RATIO2:mode><RATIO2I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO2I 1 "register_operand") - (match_operand 2 "<RATIO2:gs_extension>") - (match_operand 3 "<RATIO2:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO2 4 "register_operand") (match_operand:<RATIO2:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -268,8 +268,8 @@ (define_expand "mask_len_scatter_store<mode><mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO1 1 "register_operand") - (match_operand 2 "<gs_extension>") - (match_operand 3 "<gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO1 4 "register_operand") (match_operand:<VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 707924d..658c632 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4747,8 +4747,6 @@ void expand_gather_scatter (rtx *ops, bool is_load) { rtx ptr, vec_offset, vec_reg; - bool zero_extend_p; - int shift; rtx mask = ops[5]; rtx len = ops[6]; if (is_load) @@ -4758,78 +4756,18 @@ expand_gather_scatter (rtx *ops, bool is_load) vec_reg = ops[0]; ptr = ops[1]; vec_offset = ops[2]; - zero_extend_p = INTVAL (ops[3]); - shift = exact_log2 (INTVAL (ops[4])); } else { vec_reg = ops[4]; ptr = ops[0]; vec_offset = ops[1]; - zero_extend_p = INTVAL (ops[2]); - shift = exact_log2 (INTVAL (ops[3])); } machine_mode vec_mode = GET_MODE (vec_reg); machine_mode idx_mode = GET_MODE (vec_offset); - scalar_mode inner_idx_mode = GET_MODE_INNER (idx_mode); - unsigned inner_offsize = GET_MODE_BITSIZE (inner_idx_mode); - poly_int64 nunits = GET_MODE_NUNITS (vec_mode); bool is_vlmax = is_vlmax_len_p (vec_mode, len); - bool use_widening_shift = false; - - /* Extend the offset element to address width. */ - if (inner_offsize < BITS_PER_WORD) - { - use_widening_shift = TARGET_ZVBB && zero_extend_p && shift == 1; - /* 7.2. Vector Load/Store Addressing Modes. - If the vector offset elements are narrower than XLEN, they are - zero-extended to XLEN before adding to the ptr effective address. If - the vector offset elements are wider than XLEN, the least-significant - XLEN bits are used in the address calculation. An implementation must - raise an illegal instruction exception if the EEW is not supported for - offset elements. - - RVV spec only refers to the shift == 0 case. */ - if (!zero_extend_p || shift) - { - if (zero_extend_p) - inner_idx_mode - = int_mode_for_size (inner_offsize * 2, 0).require (); - else - inner_idx_mode = int_mode_for_size (BITS_PER_WORD, 0).require (); - machine_mode new_idx_mode - = get_vector_mode (inner_idx_mode, nunits).require (); - if (!use_widening_shift) - { - rtx tmp = gen_reg_rtx (new_idx_mode); - emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode, - zero_extend_p ? true : false)); - vec_offset = tmp; - } - idx_mode = new_idx_mode; - } - } - - if (shift) - { - rtx tmp; - if (!use_widening_shift) - tmp = expand_binop (idx_mode, ashl_optab, vec_offset, - gen_int_mode (shift, Pmode), NULL_RTX, 0, - OPTAB_DIRECT); - else - { - tmp = gen_reg_rtx (idx_mode); - insn_code icode = code_for_pred_vwsll_scalar (idx_mode); - rtx ops[] = {tmp, vec_offset, const1_rtx}; - emit_vlmax_insn (icode, BINARY_OP, ops); - } - - vec_offset = tmp; - } - insn_code icode = prepare_gather_scatter (vec_mode, idx_mode, is_load); if (is_vlmax) { diff --git a/gcc/gdbhooks.py b/gcc/gdbhooks.py index 4735d59..a581810 100644 --- a/gcc/gdbhooks.py +++ b/gcc/gdbhooks.py @@ -389,6 +389,32 @@ class CfgEdgePrinter: return result ###################################################################### +# Pretty-printers for -fanalyzer (namespace ana) +###################################################################### + +class AnaSupernodePrinter: + def __init__(self, gdbval): + self.gdbval = gdbval + + def to_string (self): + result = '<ana::supernode 0x%x' % intptr(self.gdbval) + if intptr(self.gdbval): + result += ' (SN %i)' % intptr(self.gdbval['m_index']) + result += '>' + return result + +class AnaExplodedNodePrinter: + def __init__(self, gdbval): + self.gdbval = gdbval + + def to_string (self): + result = '<ana::exploded_node 0x%x' % intptr(self.gdbval) + if intptr(self.gdbval): + result += ' (EN %i)' % intptr(self.gdbval['m_index']) + result += '>' + return result + +###################################################################### class Rtx: def __init__(self, gdbval): @@ -625,6 +651,13 @@ def build_pretty_printer(): pp.add_printer_for_types(['basic_block', 'basic_block_def *'], 'basic_block', BasicBlockPrinter) + pp.add_printer_for_types(['ana::supernode *', 'const ana::supernode *'], + 'ana::supernode', + AnaSupernodePrinter) + pp.add_printer_for_types(['ana::exploded_node *', + 'dedge<ana::eg_traits>::node_t *'], + 'ana::exploded_node', + AnaExplodedNodePrinter) pp.add_printer_for_types(['edge', 'edge_def *'], 'edge', CfgEdgePrinter) diff --git a/gcc/testsuite/gcc.c-torture/compile/pr122588-1.c b/gcc/testsuite/gcc.c-torture/compile/pr122588-1.c new file mode 100644 index 0000000..43ec621 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr122588-1.c @@ -0,0 +1,25 @@ +/* Disable warnings about __sync_nand_and_fetch. */ +/* { dg-options "-w" } */ +/* PR tree-optimization/122588 */ + +int i; +char c; + +static inline __attribute__((__always_inline__)) +void foo0 (int a) +{ +l5: + __sync_nand_and_fetch (&i, 0); + int x = __builtin_memcmp_eq (&a, 0, 4); + if (__builtin_iseqsig (x, 0.)) + goto l5; + if (a) + __builtin_unreachable (); + c = a; +} + +int +main () +{ + foo0 (1); +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr122588-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr122588-1.c new file mode 100644 index 0000000..2c214c9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr122588-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fgimple" } */ +/* PR tree-optimization/122588 */ + +/* The removal of unreachable blocks should not + change blocks which have already become true/false. + The function below was is an example of that. And + forwprop does not go into non-executable blocks + so the statement `t = _1;` was still holding the + old reference. */ + +int t; + +__GIMPLE(ssa,startwith("forwprop4")) void g(void) +{ + int _1; + __BB(2): + _1 = 1; + if (_1 != 0) + goto __BB3; + else + goto __BB4; + + __BB(3): + __builtin_unreachable (); + + __BB(4): + t = _1; + return; +} diff --git a/gcc/testsuite/gcc.dg/vect/complex/complex-operations-run.c b/gcc/testsuite/gcc.dg/vect/complex/vect-complex-operations-run.c index 2f916ab..2f916ab 100644 --- a/gcc/testsuite/gcc.dg/vect/complex/complex-operations-run.c +++ b/gcc/testsuite/gcc.dg/vect/complex/vect-complex-operations-run.c diff --git a/gcc/testsuite/gcc.dg/vect/vect-bool-3.c b/gcc/testsuite/gcc.dg/vect/vect-bool-3.c new file mode 100644 index 0000000..671f602 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bool-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_unpack } */ + +int count_true(const bool *values, int len) +{ + int count = 0; + for (int i = 0; i < len; i++) + count += values[i]; + return count; +} + +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gnat.dg/class_wide6.adb b/gcc/testsuite/gnat.dg/class_wide6.adb new file mode 100644 index 0000000..1a9b56a --- /dev/null +++ b/gcc/testsuite/gnat.dg/class_wide6.adb @@ -0,0 +1,9 @@ +package body Class_Wide6 is + + function Parse (Parser: Script_Info_Parser) return Script_Info'Class is + begin + pragma Warnings(Off); + return Parse (Parser); + end; + +end Class_Wide6; diff --git a/gcc/testsuite/gnat.dg/class_wide6.ads b/gcc/testsuite/gnat.dg/class_wide6.ads new file mode 100644 index 0000000..38c3194 --- /dev/null +++ b/gcc/testsuite/gnat.dg/class_wide6.ads @@ -0,0 +1,19 @@ +-- { dg-do compile } + +with Class_Wide6_Pkg; + +package Class_Wide6 is + + type Script_Kind_Enum is (Transformer, Validator); + + type Script_Info (Script_Kind : Script_Kind_Enum) is tagged null record; + + package Base_Script_Info_Node is new Class_Wide6_Pkg (Script_Info'Class); + + type Script_Info_Parser is new Base_Script_Info_Node.Base_Node_Parser with + null record; + + overriding function Parse (Parser: Script_Info_Parser) + return Script_Info'Class; + +end Class_Wide6; diff --git a/gcc/testsuite/gnat.dg/class_wide6_pkg.ads b/gcc/testsuite/gnat.dg/class_wide6_pkg.ads new file mode 100644 index 0000000..e3bf7e9 --- /dev/null +++ b/gcc/testsuite/gnat.dg/class_wide6_pkg.ads @@ -0,0 +1,9 @@ +generic + type Data_Type (<>) is private; +package Class_Wide6_Pkg is + + type Base_Node_Parser is abstract tagged limited null record; + + function Parse (Parser: Base_Node_Parser) return Data_Type is abstract; + +end Class_Wide6_Pkg; diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index ae7f0e7..9f8d4ad 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -5080,7 +5080,12 @@ optimize_unreachable (basic_block bb) stmt = gsi_stmt (gsi); if (gcond *cond_stmt = dyn_cast <gcond *> (stmt)) { - if (e->flags & EDGE_TRUE_VALUE) + /* If the condition is already true/false + ignore it. This can happen during copy prop of forwprop. */ + if (gimple_cond_true_p (cond_stmt) + || gimple_cond_false_p (cond_stmt)) + continue; + else if (e->flags & EDGE_TRUE_VALUE) gimple_cond_make_false (cond_stmt); else if (e->flags & EDGE_FALSE_VALUE) gimple_cond_make_true (cond_stmt); diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index c794110..e8cfb88 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4425,6 +4425,168 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) return opt_result::success (); } +/* Structure to hold information about a supported gather/scatter + configuration. */ +struct gather_scatter_config +{ + internal_fn ifn; + tree offset_vectype; + int scale; + vec<int> elsvals; +}; + +/* Determine which gather/scatter IFN is supported for the given parameters. + IFN_MASK_GATHER_LOAD, IFN_GATHER_LOAD, and IFN_MASK_LEN_GATHER_LOAD + are mutually exclusive, so we only need to find one. Return the + supported IFN or IFN_LAST if none are supported. */ + +static internal_fn +vect_gather_scatter_which_ifn (bool read_p, bool masked_p, + tree vectype, tree memory_type, + tree offset_vectype, int scale, + vec<int> *elsvals) +{ + /* Work out which functions to try. */ + internal_fn ifn, alt_ifn, alt_ifn2; + if (read_p) + { + ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; + alt_ifn = IFN_MASK_GATHER_LOAD; + alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD; + } + else + { + ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; + alt_ifn = IFN_MASK_SCATTER_STORE; + alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE; + } + + if (!offset_vectype) + return IFN_LAST; + + if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, + offset_vectype, scale, elsvals)) + return ifn; + if (internal_gather_scatter_fn_supported_p (alt_ifn, vectype, memory_type, + offset_vectype, scale, elsvals)) + return alt_ifn; + if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, memory_type, + offset_vectype, scale, elsvals)) + return alt_ifn2; + + return IFN_LAST; +} + +/* Collect all supported offset vector types for a gather load or scatter + store. READ_P is true for loads and false for stores. MASKED_P is true + if the load or store is conditional. VECTYPE is the data vector type. + MEMORY_TYPE is the type of the memory elements being loaded or stored, + and OFFSET_TYPE is the type of the offset. + SCALE is the amount by which the offset should be multiplied. + + Return a vector of all configurations the target supports (which can + be none). */ + +static auto_vec<gather_scatter_config> +vect_gather_scatter_get_configs (vec_info *vinfo, bool read_p, bool masked_p, + tree vectype, tree memory_type, + tree offset_type, int scale) +{ + auto_vec<gather_scatter_config> configs; + + auto_vec<tree, 8> offset_types_to_try; + + /* Try all sizes from the offset type's precision up to POINTER_SIZE. */ + for (unsigned int bits = TYPE_PRECISION (offset_type); + bits <= POINTER_SIZE; + bits *= 2) + { + /* Signed variant. */ + offset_types_to_try.safe_push + (build_nonstandard_integer_type (bits, 0)); + /* Unsigned variant. */ + offset_types_to_try.safe_push + (build_nonstandard_integer_type (bits, 1)); + } + + /* Once we find which IFN works for one offset type, we know that it + will work for other offset types as well. Then we can perform + the checks for the remaining offset types with only that IFN. + However, we might need to try different offset types to find which + IFN is supported, since the check is offset-type-specific. */ + internal_fn ifn = IFN_LAST; + + /* Try each offset type. */ + for (unsigned int i = 0; i < offset_types_to_try.length (); i++) + { + tree offset_type = offset_types_to_try[i]; + tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); + if (!offset_vectype) + continue; + + /* Try multiple scale values. Start with exact match, then try + smaller common scales that a target might support . */ + int scales_to_try[] = {scale, 1, 2, 4, 8}; + + for (unsigned int j = 0; + j < sizeof (scales_to_try) / sizeof (*scales_to_try); + j++) + { + int try_scale = scales_to_try[j]; + + /* Skip scales >= requested scale (except for exact match). */ + if (j > 0 && try_scale >= scale) + continue; + + /* Skip if requested scale is not a multiple of this scale. */ + if (j > 0 && scale % try_scale != 0) + continue; + + vec<int> elsvals = vNULL; + + /* If we haven't determined which IFN is supported yet, try all three + to find which one the target supports. */ + if (ifn == IFN_LAST) + { + ifn = vect_gather_scatter_which_ifn (read_p, masked_p, + vectype, memory_type, + offset_vectype, try_scale, + &elsvals); + if (ifn != IFN_LAST) + { + /* Found which IFN is supported. Save this configuration. */ + gather_scatter_config config; + config.ifn = ifn; + config.offset_vectype = offset_vectype; + config.scale = try_scale; + config.elsvals = elsvals; + configs.safe_push (config); + } + } + else + { + /* We already know which IFN is supported, just check if this + offset type and scale work with it. */ + if (internal_gather_scatter_fn_supported_p (ifn, vectype, + memory_type, + offset_vectype, + try_scale, + &elsvals)) + { + gather_scatter_config config; + config.ifn = ifn; + config.offset_vectype = offset_vectype; + config.scale = try_scale; + config.elsvals = elsvals; + configs.safe_push (config); + } + } + } + } + + return configs; +} + /* Check whether we can use an internal function for a gather load or scatter store. READ_P is true for loads and false for stores. MASKED_P is true if the load or store is conditional. MEMORY_TYPE is @@ -4433,18 +4595,30 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) base address. If OFFSET_TYPE is scalar the function chooses an appropriate vector type for it. SCALE is the amount by which the offset should be multiplied *after* it has been converted to address width. + If the target does not support the requested SCALE, SUPPORTED_SCALE + will contain the scale that is actually supported + (which may be smaller, requiring additional multiplication). + Otherwise SUPPORTED_SCALE is 0. Return true if the function is supported, storing the function id in *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. + If we support an offset vector type with different signedness than + OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE. - If we can use gather and store the possible else values in ELSVALS. */ + If we can use gather/scatter and ELSVALS is nonzero, store the possible + else values in ELSVALS. */ bool vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, tree vectype, tree memory_type, tree offset_type, - int scale, internal_fn *ifn_out, - tree *offset_vectype_out, vec<int> *elsvals) + int scale, int *supported_scale, + internal_fn *ifn_out, + tree *offset_vectype_out, + tree *supported_offset_vectype, + vec<int> *elsvals) { + *supported_offset_vectype = NULL_TREE; + *supported_scale = 0; unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); unsigned int element_bits = vector_element_bits (vectype); if (element_bits != memory_bits) @@ -4452,80 +4626,131 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, memory elements. */ return false; - /* Work out which function we need. */ - internal_fn ifn, alt_ifn, alt_ifn2; - if (read_p) - { - ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD; - alt_ifn = IFN_MASK_GATHER_LOAD; - /* When target supports MASK_LEN_GATHER_LOAD, we always - use MASK_LEN_GATHER_LOAD regardless whether len and - mask are valid or not. */ - alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD; - } - else - { - ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE; - alt_ifn = IFN_MASK_SCATTER_STORE; - /* When target supports MASK_LEN_SCATTER_STORE, we always - use MASK_LEN_SCATTER_STORE regardless whether len and - mask are valid or not. */ - alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE; - } + /* Get the original offset vector type for comparison. */ + tree offset_vectype = VECTOR_TYPE_P (offset_type) + ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type); - for (;;) + offset_type = TREE_TYPE (offset_vectype); + + /* Get all supported configurations for this data vector type. */ + auto_vec<gather_scatter_config> configs + = vect_gather_scatter_get_configs (vinfo, read_p, masked_p, vectype, + memory_type, offset_type, scale); + + if (configs.is_empty ()) + return false; + + /* Selection priority: + 1 - Exact scale match + offset type match + 2 - Exact scale match + sign-swapped offset + 3 - Smaller scale + offset type match + 4 - Smaller scale + sign-swapped offset + Within each category, prefer smaller offset types. */ + + /* First pass: exact scale match with no conversion. */ + for (unsigned int i = 0; i < configs.length (); i++) { - tree offset_vectype; - if (VECTOR_TYPE_P (offset_type)) - offset_vectype = offset_type; - else + if (configs[i].scale == scale + && TYPE_SIGN (configs[i].offset_vectype) + == TYPE_SIGN (offset_vectype)) { - offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); - if (!offset_vectype) - return false; + *ifn_out = configs[i].ifn; + *offset_vectype_out = configs[i].offset_vectype; + if (elsvals) + *elsvals = configs[i].elsvals; + return true; } + } - /* Test whether the target supports this combination. */ - if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, - offset_vectype, scale, - elsvals)) + /* No direct match. This means we try to find either + - a sign-swapped offset vectype or + - a different scale and 2x larger offset type + - a different scale and larger sign-swapped offset vectype. */ + unsigned int offset_precision = TYPE_PRECISION (TREE_TYPE (offset_vectype)); + unsigned int needed_precision + = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE; + needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE); + + /* Second pass: No direct match. This means we try to find a sign-swapped + offset vectype. */ + enum tree_code tmp; + for (unsigned int i = 0; i < configs.length (); i++) + { + unsigned int precision + = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype)); + if (configs[i].scale == scale + && precision >= needed_precision + && (supportable_convert_operation (CONVERT_EXPR, + configs[i].offset_vectype, + offset_vectype, &tmp) + || (needed_precision == offset_precision + && tree_nop_conversion_p (configs[i].offset_vectype, + offset_vectype)))) { - *ifn_out = ifn; + *ifn_out = configs[i].ifn; *offset_vectype_out = offset_vectype; + *supported_offset_vectype = configs[i].offset_vectype; + if (elsvals) + *elsvals = configs[i].elsvals; return true; } - else if (!masked_p - && internal_gather_scatter_fn_supported_p (alt_ifn, vectype, - memory_type, - offset_vectype, - scale, elsvals)) + } + + /* Third pass: Try a smaller scale with the same signedness. */ + needed_precision = offset_precision * 2; + needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE); + + for (unsigned int i = 0; i < configs.length (); i++) + { + unsigned int precision + = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype)); + if (configs[i].scale < scale + && precision >= needed_precision + && (supportable_convert_operation (CONVERT_EXPR, + configs[i].offset_vectype, + offset_vectype, &tmp) + || (needed_precision == offset_precision + && tree_nop_conversion_p (configs[i].offset_vectype, + offset_vectype)))) { - *ifn_out = alt_ifn; - *offset_vectype_out = offset_vectype; + *ifn_out = configs[i].ifn; + *offset_vectype_out = configs[i].offset_vectype; + *supported_scale = configs[i].scale; + if (elsvals) + *elsvals = configs[i].elsvals; return true; } - else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype, - memory_type, - offset_vectype, scale, - elsvals)) + } + + /* Fourth pass: Try a smaller scale and sign-swapped offset vectype. */ + needed_precision + = TYPE_UNSIGNED (offset_vectype) ? offset_precision * 2 : POINTER_SIZE; + needed_precision = std::min (needed_precision, (unsigned) POINTER_SIZE); + + for (unsigned int i = 0; i < configs.length (); i++) + { + unsigned int precision + = TYPE_PRECISION (TREE_TYPE (configs[i].offset_vectype)); + if (configs[i].scale < scale + && precision >= needed_precision + && (supportable_convert_operation (CONVERT_EXPR, + configs[i].offset_vectype, + offset_vectype, &tmp) + || (needed_precision == offset_precision + && tree_nop_conversion_p (configs[i].offset_vectype, + offset_vectype)))) { - *ifn_out = alt_ifn2; + *ifn_out = configs[i].ifn; *offset_vectype_out = offset_vectype; + *supported_offset_vectype = configs[i].offset_vectype; + *supported_scale = configs[i].scale; + if (elsvals) + *elsvals = configs[i].elsvals; return true; } - - /* For fixed offset vector type we're done. */ - if (VECTOR_TYPE_P (offset_type)) - return false; - - if (TYPE_PRECISION (offset_type) >= POINTER_SIZE - && TYPE_PRECISION (offset_type) >= element_bits) - return false; - - /* Try a larger offset vector type. */ - offset_type = build_nonstandard_integer_type - (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); } + + return false; } /* STMT_INFO is a call to an internal gather load or scatter store function. @@ -4678,6 +4903,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype, base = fold_convert (sizetype, base); base = size_binop (PLUS_EXPR, base, size_int (pbytepos)); + int tmp_scale; + tree tmp_offset_vectype; /* OFF at this point may be either a SSA_NAME or some tree expression from get_inner_reference. Try to peel off loop invariants from it @@ -4750,14 +4977,18 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype, && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, vectype, memory_type, signed_char_type_node, - new_scale, &ifn, + new_scale, &tmp_scale, + &ifn, &offset_vectype, + &tmp_offset_vectype, elsvals) && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, vectype, memory_type, unsigned_char_type_node, - new_scale, &ifn, + new_scale, &tmp_scale, + &ifn, &offset_vectype, + &tmp_offset_vectype, elsvals)) break; scale = new_scale; @@ -4780,8 +5011,12 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype, && !POINTER_TYPE_P (TREE_TYPE (off)) && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, vectype, memory_type, - TREE_TYPE (off), scale, &ifn, - &offset_vectype, elsvals)) + TREE_TYPE (off), + scale, &tmp_scale, + &ifn, + &offset_vectype, + &tmp_offset_vectype, + elsvals)) break; if (TYPE_PRECISION (TREE_TYPE (op0)) @@ -4834,8 +5069,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, tree vectype, if (use_ifn_p) { if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, - vectype, memory_type, offtype, scale, - &ifn, &offset_vectype, elsvals)) + vectype, memory_type, offtype, + scale, &tmp_scale, + &ifn, &offset_vectype, + &tmp_offset_vectype, + elsvals)) ifn = IFN_LAST; decl = NULL_TREE; } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 83acbb3..1620fc6 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1505,6 +1505,17 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, : ls->strided_offset_vectype); tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr)); int scale = SLP_TREE_GS_SCALE (slp_node); + + /* The following "supported" checks just verify what we established in + get_load_store_type and don't try different offset types. + Therefore, off_vectype must be a supported offset type. In case + we chose a different one use this instead. */ + if (ls->supported_offset_vectype) + off_vectype = ls->supported_offset_vectype; + /* Same for scale. */ + if (ls->supported_scale) + scale = ls->supported_scale; + if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, memory_type, off_vectype, scale, @@ -1697,10 +1708,13 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype, /* See whether the target supports the operation with an offset no narrower than OFFSET_TYPE. */ tree memory_type = TREE_TYPE (DR_REF (dr)); + tree tmp_offset_vectype; + int tmp_scale; if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, - vectype, memory_type, offset_type, scale, + vectype, memory_type, offset_type, + scale, &tmp_scale, &gs_info->ifn, &gs_info->offset_vectype, - elsvals) + &tmp_offset_vectype, elsvals) || gs_info->ifn == IFN_LAST) continue; @@ -1779,10 +1793,12 @@ vect_use_grouped_gather (dr_vec_info *dr_info, tree vectype, type must exist) so it is possible that even though a gather/scatter is not available we still have a strided load/store. */ bool ok = false; + tree tmp_vectype; + int tmp_scale; if (vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype, - TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn, - &offset_vectype, elsvals)) + TREE_TYPE (*pun_vectype), *pun_vectype, 1, &tmp_scale, &ifn, + &offset_vectype, &tmp_vectype, elsvals)) ok = true; else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype, elsvals)) @@ -1840,15 +1856,6 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype, masked_p, gs_info, elsvals)) return false; } - else - { - tree old_offset_type = TREE_TYPE (gs_info->offset); - tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); - - gcc_assert (TYPE_PRECISION (new_offset_type) - >= TYPE_PRECISION (old_offset_type)); - gs_info->offset = fold_convert (new_offset_type, gs_info->offset); - } if (!single_element_p && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype), @@ -2080,6 +2087,8 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, tree *ls_type = &ls->ls_type; bool *slp_perm = &ls->slp_perm; unsigned *n_perms = &ls->n_perms; + tree *supported_offset_vectype = &ls->supported_offset_vectype; + int *supported_scale = &ls->supported_scale; loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; @@ -2152,12 +2161,29 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); tree tem; if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD, - masked_p, vectype, - memory_type, - offset_vectype, scale, + masked_p, vectype, memory_type, + offset_vectype, scale, supported_scale, &ls->gs.ifn, &tem, - elsvals)) - *memory_access_type = VMAT_GATHER_SCATTER_IFN; + supported_offset_vectype, elsvals)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "gather/scatter with required " + "offset type " + "%T and offset scale %d.\n", + offset_vectype, scale); + if (*supported_offset_vectype) + dump_printf_loc (MSG_NOTE, vect_location, + " target supports offset type %T.\n", + *supported_offset_vectype); + if (*supported_scale) + dump_printf_loc (MSG_NOTE, vect_location, + " target supports offset scale %d.\n", + *supported_scale); + } + *memory_access_type = VMAT_GATHER_SCATTER_IFN; + } else if (vls_type == VLS_LOAD ? (targetm.vectorize.builtin_gather && (ls->gs.decl @@ -2421,6 +2447,19 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, masked_p, &gs_info, elsvals, group_size, single_element_p)) { + /* vect_use_strided_gather_scatters_p does not save the actually + supported scale and offset type so do that here. + We need it later in check_load_store_for_partial_vectors + where we only check if the given internal function is supported + (to choose whether to use the IFN, LEGACY, or EMULATED flavor + of gather/scatter) and don't re-do the full analysis. */ + tree tmp; + gcc_assert (vect_gather_scatter_fn_p + (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype, + gs_info.memory_type, TREE_TYPE (gs_info.offset), + gs_info.scale, supported_scale, &gs_info.ifn, + &tmp, supported_offset_vectype, elsvals)); + SLP_TREE_GS_SCALE (slp_node) = gs_info.scale; SLP_TREE_GS_BASE (slp_node) = error_mark_node; ls->gs.ifn = gs_info.ifn; @@ -5282,15 +5321,12 @@ vectorizable_conversion (vec_info *vinfo, return false; if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) - && ((INTEGRAL_TYPE_P (lhs_type) - && !type_has_mode_precision_p (lhs_type)) - || (INTEGRAL_TYPE_P (rhs_type) - && !type_has_mode_precision_p (rhs_type)))) + && INTEGRAL_TYPE_P (lhs_type) + && !type_has_mode_precision_p (lhs_type)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "type conversion to/from bit-precision unsupported." - "\n"); + "type conversion to bit-precision unsupported\n"); return false; } @@ -8812,6 +8848,15 @@ vectorizable_store (vec_info *vinfo, { if (costing_p) { + if (ls.supported_offset_vectype) + inside_cost + += record_stmt_cost (cost_vec, 1, vector_stmt, + slp_node, 0, vect_body); + if (ls.supported_scale) + inside_cost + += record_stmt_cost (cost_vec, 1, vector_stmt, + slp_node, 0, vect_body); + unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost += record_stmt_cost (cost_vec, cnunits, scalar_store, @@ -8823,6 +8868,30 @@ vectorizable_store (vec_info *vinfo, vec_offset = vec_offsets[j]; tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); + bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset)); + + /* Perform the offset conversion and scaling if necessary. */ + if (!strided + && (ls.supported_offset_vectype || ls.supported_scale)) + { + gimple_seq stmts = NULL; + if (ls.supported_offset_vectype) + vec_offset = gimple_convert + (&stmts, ls.supported_offset_vectype, vec_offset); + if (ls.supported_scale) + { + tree mult_cst = build_int_cst + (TREE_TYPE (TREE_TYPE (vec_offset)), + SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale); + tree mult = build_vector_from_val + (TREE_TYPE (vec_offset), mult_cst); + vec_offset = gimple_build + (&stmts, MULT_EXPR, TREE_TYPE (vec_offset), + vec_offset, mult); + scale = size_int (ls.supported_scale); + } + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + } if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE) { @@ -10638,6 +10707,15 @@ vectorizable_load (vec_info *vinfo, { if (costing_p) { + if (ls.supported_offset_vectype) + inside_cost + += record_stmt_cost (cost_vec, 1, vector_stmt, + slp_node, 0, vect_body); + if (ls.supported_scale) + inside_cost + += record_stmt_cost (cost_vec, 1, vector_stmt, + slp_node, 0, vect_body); + unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost = record_stmt_cost (cost_vec, cnunits, scalar_load, @@ -10648,6 +10726,30 @@ vectorizable_load (vec_info *vinfo, vec_offset = vec_offsets[i]; tree zero = build_zero_cst (vectype); tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); + bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset)); + + /* Perform the offset conversion and scaling if necessary. */ + if (!strided + && (ls.supported_offset_vectype || ls.supported_scale)) + { + gimple_seq stmts = NULL; + if (ls.supported_offset_vectype) + vec_offset = gimple_convert + (&stmts, ls.supported_offset_vectype, vec_offset); + if (ls.supported_scale) + { + tree mult_cst = build_int_cst + (TREE_TYPE (TREE_TYPE (vec_offset)), + SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale); + tree mult = build_vector_from_val + (TREE_TYPE (vec_offset), mult_cst); + vec_offset = gimple_build + (&stmts, MULT_EXPR, TREE_TYPE (vec_offset), + vec_offset, mult); + scale = size_int (ls.supported_scale); + } + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + } if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 359c994..b7f3297 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -289,6 +289,15 @@ struct vect_load_store_data : vect_data { } gs; tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided tree ls_type; // VMAT_GATHER_SCATTER_IFN + /* This is set to a supported offset vector type if we don't support the + originally requested offset type, otherwise NULL. + If nonzero there will be an additional offset conversion before + the gather/scatter. */ + tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN + /* Similar for scale. Only nonzero if we don't support the requested + scale. Then we need to multiply the offset vector before the + gather/scatter. */ + int supported_scale; // VMAT_GATHER_SCATTER_IFN auto_vec<int> elsvals; /* True if the load requires a load permutation. */ bool slp_perm; // SLP_TREE_LOAD_PERMUTATION @@ -2588,8 +2597,8 @@ extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance); extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, - tree, int, internal_fn *, tree *, - vec<int> * = nullptr); + tree, int, int *, internal_fn *, tree *, + tree *, vec<int> * = nullptr); extern bool vect_check_gather_scatter (stmt_vec_info, tree, loop_vec_info, gather_scatter_info *, vec<int> * = nullptr); diff --git a/gcc/tree.cc b/gcc/tree.cc index 298784e..4c8e31c 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -11031,32 +11031,34 @@ build_call_1 (tree return_type, tree fn, int nargs) /* Build a CALL_EXPR of class tcc_vl_exp with the indicated RETURN_TYPE and FN and a null static chain slot. NARGS is the number of call arguments - which are specified as "..." arguments. */ + which are specified as a va_list ARGS. */ tree -build_call_nary (tree return_type, tree fn, int nargs, ...) +build_call_valist (tree return_type, tree fn, int nargs, va_list args) { - tree ret; - va_list args; - va_start (args, nargs); - ret = build_call_valist (return_type, fn, nargs, args); - va_end (args); - return ret; + tree t; + int i; + + t = build_call_1 (return_type, fn, nargs); + for (i = 0; i < nargs; i++) + CALL_EXPR_ARG (t, i) = va_arg (args, tree); + process_call_operands (t); + return t; } /* Build a CALL_EXPR of class tcc_vl_exp with the indicated RETURN_TYPE and - FN and a null static chain slot. NARGS is the number of call arguments - which are specified as a va_list ARGS. */ + FN and a null static chain slot. ARGS specifies the call arguments. */ tree -build_call_valist (tree return_type, tree fn, int nargs, va_list args) +build_call (tree return_type, tree fn, std::initializer_list<tree> args) { tree t; int i; + int nargs = args.size(); t = build_call_1 (return_type, fn, nargs); for (i = 0; i < nargs; i++) - CALL_EXPR_ARG (t, i) = va_arg (args, tree); + CALL_EXPR_ARG (t, i) = args.begin()[i]; process_call_operands (t); return t; } @@ -4974,8 +4974,21 @@ extern tree build_omp_clause (location_t, enum omp_clause_code); extern tree build_vl_exp (enum tree_code, int CXX_MEM_STAT_INFO); -extern tree build_call_nary (tree, tree, int, ...); extern tree build_call_valist (tree, tree, int, va_list); +extern tree build_call (tree, tree, std::initializer_list<tree>); + + +/* Build a CALL_EXPR of class tcc_vl_exp with the indicated RETURN_TYPE and + FN and a null static chain slot. NARGS is the number of call arguments + which are specified as "..." arguments. */ + +template <typename ...T> +inline tree build_call_nary (tree return_type, tree fn, int nargs, T... args) +{ + std::initializer_list<tree> args_ = {args...}; + gcc_checking_assert (sizeof...(args) == nargs); + return build_call (return_type, fn, args_); +} #define build_call_array(T1,T2,N,T3)\ build_call_array_loc (UNKNOWN_LOCATION, T1, T2, N, T3) extern tree build_call_array_loc (location_t, tree, tree, int, const tree *); |
