diff options
author | Martin Liska <mliska@suse.cz> | 2021-08-10 09:12:21 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2021-08-10 09:12:21 +0200 |
commit | 7d5bfcbfd3f9074e48e78b82c60d9b5aca5011a0 (patch) | |
tree | 9a537b51e8dcb012012902396c043b80f4848fb4 /gcc | |
parent | 9207fa3effc9970d9e2a48993adbfc34c08a8f65 (diff) | |
parent | 0631faf87a197145acd833249bf8f20a1c4aaabf (diff) | |
download | gcc-7d5bfcbfd3f9074e48e78b82c60d9b5aca5011a0.zip gcc-7d5bfcbfd3f9074e48e78b82c60d9b5aca5011a0.tar.gz gcc-7d5bfcbfd3f9074e48e78b82c60d9b5aca5011a0.tar.bz2 |
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc')
76 files changed, 2400 insertions, 129 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9d39f0f..e2ffd84 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,126 @@ +2021-08-09 Andrew MacLeod <amacleod@redhat.com> + + PR tree-optimization/101741 + * gimple-range-fold.cc (fold_using_range::range_of_builtin_call): Check + type of parameter for toupper/tolower. + +2021-08-09 Martin Jambor <mjambor@suse.cz> + + PR testsuite/101654 + * ipa-prop.c (propagate_controlled_uses): Removed a spurious space. + +2021-08-09 Pat Haugen <pthaugen@linux.ibm.com> + + * config/rs6000/rs6000.c (is_load_insn1): Verify destination is a + register. + (is_store_insn1): Verify source is a register. + +2021-08-09 Uroš Bizjak <ubizjak@gmail.com> + + PR target/101812 + * config/i386/mmx.md (<any_logic:code>v2sf3): + Rename from *mmx_<any_logic:code>v2sf3 + +2021-08-09 Thomas Schwinge <thomas@codesourcery.com> + + * config/nvptx/nvptx.c: Cross-reference parts adapted in + 'gcc/omp-oacc-neuter-broadcast.cc'. + * omp-low.c: Likewise. + * omp-oacc-neuter-broadcast.cc: Cross-reference parts adapted from + the above files. + +2021-08-09 Julian Brown <julian@codesourcery.com> + Kwok Cheung Yeung <kcy@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + * config/gcn/gcn.c (gcn_init_builtins): Override decls for + BUILT_IN_GOACC_SINGLE_START, BUILT_IN_GOACC_SINGLE_COPY_START, + BUILT_IN_GOACC_SINGLE_COPY_END and BUILT_IN_GOACC_BARRIER. + (gcn_goacc_validate_dims): Turn on worker partitioning unconditionally. + (gcn_fork_join): Update comment. + * config/gcn/gcn.opt (flag_worker_partitioning): Remove. + (macc_experimental_workers): Remove unused option. + +2021-08-09 Julian Brown <julian@codesourcery.com> + Nathan Sidwell <nathan@codesourcery.com> (via 'gcc/config/nvptx/nvptx.c' master) + Kwok Cheung Yeung <kcy@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + + * Makefile.in (OBJS): Add omp-oacc-neuter-broadcast.o. + * doc/tm.texi.in (TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD): + Add documentation hook. + * doc/tm.texi: Regenerate. + * omp-oacc-neuter-broadcast.cc: New file. + * omp-builtins.def (BUILT_IN_GOACC_BARRIER) + (BUILT_IN_GOACC_SINGLE_START, BUILT_IN_GOACC_SINGLE_COPY_START) + (BUILT_IN_GOACC_SINGLE_COPY_END): New builtins. + * passes.def (pass_omp_oacc_neuter_broadcast): Add pass. + * target.def (goacc.create_worker_broadcast_record): Add target + hook. + * tree-pass.h (make_pass_omp_oacc_neuter_broadcast): Add + prototype. + * config/gcn/gcn-protos.h (gcn_goacc_adjust_propagation_record): + Rename prototype to... + (gcn_goacc_create_worker_broadcast_record): ... this. + * config/gcn/gcn-tree.c (gcn_goacc_adjust_propagation_record): Rename + function to... + (gcn_goacc_create_worker_broadcast_record): ... this. + * config/gcn/gcn.c (TARGET_GOACC_ADJUST_PROPAGATION_RECORD): + Rename to... + (TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD): ... this. + +2021-08-09 Tejas Belagod <tejas.belagod@arm.com> + + PR target/101609 + * config/aarch64/aarch64-simd.md (vlshr<mode>3, vashr<mode>3): Use + the right iterator. + +2021-08-09 Thomas Schwinge <thomas@codesourcery.com> + + * Makefile.in (GTFILES): Remove '$(srcdir)/omp-offload.c'. + +2021-08-09 Thomas Schwinge <thomas@codesourcery.com> + + * builtins.def (DEF_GOACC_BUILTIN, DEF_GOMP_BUILTIN): Don't + consider '-foffload-abi'. + * common.opt (-foffload-abi): Remove 'Var', 'Init'. + * opts.c (common_handle_option) <-foffload-abi> [ACCEL_COMPILER]: + Ignore. + +2021-08-09 Thomas Schwinge <thomas@codesourcery.com> + + * optc-gen.awk: Sanity check that 'Init' doesn't appear without + 'Var'. + +2021-08-09 Thomas Schwinge <thomas@codesourcery.com> + + * omp-builtins.def (BUILT_IN_ACC_GET_DEVICE_TYPE): Remove. + +2021-08-09 Thomas Schwinge <thomas@codesourcery.com> + + * doc/gty.texi (Files): Update. + +2021-08-09 Thomas Schwinge <thomas@codesourcery.com> + + * doc/gty.texi (Files): Fix GTY header file example. + +2021-08-09 Roger Sayle <roger@nextmovesoftware.com> + + * tree-ssa-ccp.c (value_mask_to_min_max): Helper function to + determine the upper and lower bounds from a mask-value pair. + (bit_value_unop) [ABS_EXPR, ABSU_EXPR]: Add support for + absolute value and unsigned absolute value expressions. + (bit_value_binop): Initialize *VAL's precision. + [LT_EXPR, LE_EXPR]: Use value_mask_to_min_max to determine + upper and lower bounds of operands. Add LE_EXPR/GE_EXPR + support when the operands are unknown but potentially equal. + [MIN_EXPR, MAX_EXPR]: Support minimum/maximum expressions. + +2021-08-09 Bin Cheng <bin.cheng@linux.alibaba.com> + + * config/aarch64/aarch64.md + (*extend<SHORT:mode><GPI:mode>2_aarch64): Use %<GPI:w>0. + 2021-08-08 Sergei Trofimovich <siarheit@google.com> * lra-constraints.c: Fix s/otput/output/ typo. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 859da5a..7eb9baf 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210809 +20210810 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index c0f6e0a..6653e9e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1513,6 +1513,7 @@ OBJS = \ omp-general.o \ omp-low.o \ omp-oacc-kernels-decompose.o \ + omp-oacc-neuter-broadcast.o \ omp-simd-clone.o \ opt-problem.o \ optabs.o \ @@ -2693,7 +2694,6 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/tree-ssa-operands.h \ $(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \ $(srcdir)/omp-offload.h \ - $(srcdir)/omp-offload.c \ $(srcdir)/omp-general.c \ $(srcdir)/omp-low.c \ $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c \ diff --git a/gcc/builtins.def b/gcc/builtins.def index ec556df..45a09b4 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -205,14 +205,11 @@ along with GCC; see the file COPYING3. If not see /* Builtin used by the implementation of OpenACC and OpenMP. Few of these are actually implemented in the compiler; most are in libgomp. */ -/* These builtins also need to be enabled in offloading compilers invoked from - mkoffload; for that purpose, we're checking the -foffload-abi flag here. */ #undef DEF_GOACC_BUILTIN #define DEF_GOACC_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ false, true, true, ATTRS, false, \ - (flag_openacc \ - || flag_offload_abi != OFFLOAD_ABI_UNSET)) + flag_openacc) #undef DEF_GOACC_BUILTIN_COMPILER #define DEF_GOACC_BUILTIN_COMPILER(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ @@ -227,8 +224,7 @@ along with GCC; see the file COPYING3. If not see false, true, true, ATTRS, false, \ (flag_openacc \ || flag_openmp \ - || flag_tree_parallelize_loops > 1 \ - || flag_offload_abi != OFFLOAD_ABI_UNSET)) + || flag_tree_parallelize_loops > 1)) /* Builtin used by the implementation of GNU TM. These functions are mapped to the actual implementation of the STM library. */ diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index 5d6565b..c5bf337 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -2992,7 +2992,7 @@ c_expr_sizeof_expr (location_t loc, struct c_expr expr) c_last_sizeof_loc = loc; ret.original_code = SIZEOF_EXPR; ret.original_type = NULL; - if (c_vla_type_p (TREE_TYPE (folded_expr))) + if (C_TYPE_VARIABLE_SIZE (TREE_TYPE (folded_expr))) { /* sizeof is evaluated when given a vla (C99 6.5.3.4p2). */ ret.value = build2 (C_MAYBE_CONST_EXPR, TREE_TYPE (ret.value), diff --git a/gcc/common.opt b/gcc/common.opt index d9da113..ed8ab5f 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2112,7 +2112,7 @@ Common Driver Joined MissingArgError(options or targets=options missing after %q -foffload-options=<targets>=<options> Specify options for the offloading targets. foffload-abi= -Common Joined RejectNegative Enum(offload_abi) Var(flag_offload_abi) Init(OFFLOAD_ABI_UNSET) +Common Joined RejectNegative Enum(offload_abi) -foffload-abi=[lp64|ilp32] Set the ABI to use in an offload compiler. Enum diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index c5638d0..48eddf6 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1299,13 +1299,10 @@ DONE; }) -;; Using mode VDQ_BHSI as there is no V2DImode neg! -;; Negating individual lanes most certainly offsets the -;; gain from vectorization. (define_expand "vashr<mode>3" - [(match_operand:VDQ_BHSI 0 "register_operand") - (match_operand:VDQ_BHSI 1 "register_operand") - (match_operand:VDQ_BHSI 2 "register_operand")] + [(match_operand:VDQ_I 0 "register_operand") + (match_operand:VDQ_I 1 "register_operand") + (match_operand:VDQ_I 2 "register_operand")] "TARGET_SIMD" { rtx neg = gen_reg_rtx (<MODE>mode); @@ -1333,9 +1330,9 @@ ) (define_expand "vlshr<mode>3" - [(match_operand:VDQ_BHSI 0 "register_operand") - (match_operand:VDQ_BHSI 1 "register_operand") - (match_operand:VDQ_BHSI 2 "register_operand")] + [(match_operand:VDQ_I 0 "register_operand") + (match_operand:VDQ_I 1 "register_operand") + (match_operand:VDQ_I 2 "register_operand")] "TARGET_SIMD" { rtx neg = gen_reg_rtx (<MODE>mode); diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index eb8ccd4..7085cd4 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1880,7 +1880,7 @@ "@ sxt<SHORT:size>\t%<GPI:w>0, %w1 ldrs<SHORT:size>\t%<GPI:w>0, %1 - smov\t%w0, %1.<SHORT:size>[0]" + smov\t%<GPI:w>0, %1.<SHORT:size>[0]" [(set_attr "type" "extend,load_4,neon_to_gp") (set_attr "arch" "*,*,fp")] ) diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index 8bd0b43..5d62a84 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -38,9 +38,10 @@ extern rtx gcn_full_exec (); extern rtx gcn_full_exec_reg (); extern rtx gcn_gen_undef (machine_mode); extern bool gcn_global_address_p (rtx); -extern tree gcn_goacc_adjust_propagation_record (tree record_type, bool sender, - const char *name); extern tree gcn_goacc_adjust_private_decl (location_t, tree var, int level); +extern tree gcn_goacc_create_worker_broadcast_record (tree record_type, + bool sender, + const char *name); extern void gcn_goacc_reduction (gcall *call); extern bool gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg); diff --git a/gcc/config/gcn/gcn-tree.c b/gcc/config/gcn/gcn-tree.c index 1eb8882..f722d2d 100644 --- a/gcc/config/gcn/gcn-tree.c +++ b/gcc/config/gcn/gcn-tree.c @@ -548,35 +548,6 @@ gcn_goacc_reduction (gcall *call) } } -/* Implement TARGET_GOACC_ADJUST_PROPAGATION_RECORD. - - Tweak (worker) propagation record, e.g. to put it in shared memory. */ - -tree -gcn_goacc_adjust_propagation_record (tree record_type, bool sender, - const char *name) -{ - tree type = record_type; - - TYPE_ADDR_SPACE (type) = ADDR_SPACE_LDS; - - if (!sender) - type = build_pointer_type (type); - - tree decl = create_tmp_var_raw (type, name); - - if (sender) - { - DECL_CONTEXT (decl) = NULL_TREE; - TREE_STATIC (decl) = 1; - } - - if (sender) - varpool_node::finalize_decl (decl); - - return decl; -} - tree gcn_goacc_adjust_private_decl (location_t, tree var, int level) { @@ -604,4 +575,33 @@ gcn_goacc_adjust_private_decl (location_t, tree var, int level) return var; } +/* Implement TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD. + + Create OpenACC worker state propagation record in shared memory. */ + +tree +gcn_goacc_create_worker_broadcast_record (tree record_type, bool sender, + const char *name) +{ + tree type = record_type; + + TYPE_ADDR_SPACE (type) = ADDR_SPACE_LDS; + + if (!sender) + type = build_pointer_type (type); + + tree decl = create_tmp_var_raw (type, name); + + if (sender) + { + DECL_CONTEXT (decl) = NULL_TREE; + TREE_STATIC (decl) = 1; + } + + if (sender) + varpool_node::finalize_decl (decl); + + return decl; +} + /* }}} */ diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index d25c4e5..9df2827 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -3712,8 +3712,6 @@ gcn_init_builtins (void) TREE_NOTHROW (gcn_builtin_decls[i]) = 1; } -/* FIXME: remove the ifdef once OpenACC support is merged upstream. */ -#ifdef BUILT_IN_GOACC_SINGLE_START /* These builtins need to take/return an LDS pointer: override the generic versions here. */ @@ -3730,7 +3728,6 @@ gcn_init_builtins (void) set_builtin_decl (BUILT_IN_GOACC_BARRIER, gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false); -#endif } /* Implement TARGET_INIT_LIBFUNCS. */ @@ -5019,11 +5016,7 @@ gcn_goacc_validate_dims (tree decl, int dims[], int fn_level, unsigned /*used*/) { bool changed = false; - - /* FIXME: remove -facc-experimental-workers when they're ready. */ - int max_workers = flag_worker_partitioning ? 16 : 1; - - gcc_assert (!flag_worker_partitioning); + const int max_workers = 16; /* The vector size must appear to be 64, to the user, unless this is a SEQ routine. The real, internal value is always 1, which means use @@ -5060,8 +5053,7 @@ gcn_goacc_validate_dims (tree decl, int dims[], int fn_level, { dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS; if (dims[GOMP_DIM_WORKER] < 0) - dims[GOMP_DIM_WORKER] = (flag_worker_partitioning - ? GCN_DEFAULT_WORKERS : 1); + dims[GOMP_DIM_WORKER] = GCN_DEFAULT_WORKERS; if (dims[GOMP_DIM_GANG] < 0) dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS; changed = true; @@ -5126,8 +5118,7 @@ static bool gcn_fork_join (gcall *ARG_UNUSED (call), const int *ARG_UNUSED (dims), bool ARG_UNUSED (is_fork)) { - /* GCN does not use the fork/join concept invented for NVPTX. - Instead we use standard autovectorization. */ + /* GCN does not need to expand fork/join markers at the RTL level. */ return false; } @@ -6513,11 +6504,11 @@ gcn_dwarf_register_span (rtx rtl) #define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA #define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa -#undef TARGET_GOACC_ADJUST_PROPAGATION_RECORD -#define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \ - gcn_goacc_adjust_propagation_record #undef TARGET_GOACC_ADJUST_PRIVATE_DECL #define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl +#undef TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD +#define TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD \ + gcn_goacc_create_worker_broadcast_record #undef TARGET_GOACC_FORK_JOIN #define TARGET_GOACC_FORK_JOIN gcn_fork_join #undef TARGET_GOACC_REDUCTION diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index b2b10b0..6faacca 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -62,11 +62,6 @@ bool flag_bypass_init_error = false mbypass-init-error Target RejectNegative Var(flag_bypass_init_error) -bool flag_worker_partitioning = false - -macc-experimental-workers -Target Var(flag_worker_partitioning) Init(0) - int stack_size_opt = -1 mstack-size= diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 4aa28a5..87cceac 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -166,7 +166,8 @@ ;; s Sibcall memory operand, not valid for TARGET_X32 ;; w Call memory operand, not valid for TARGET_X32 ;; z Constant call address operand. -;; C SSE constant operand. +;; C Integer SSE constant with all bits set operand. +;; F Floating-point SSE constant with all bits set operand. (define_constraint "Bf" "@internal Flags register operand." @@ -216,11 +217,16 @@ (match_operand 0 "constant_call_address_operand")) (define_constraint "BC" - "@internal SSE constant -1 operand." + "@internal integer SSE constant with all bits set operand." (and (match_test "TARGET_SSE") (ior (match_test "op == constm1_rtx") (match_operand 0 "vector_all_ones_operand")))) +(define_constraint "BF" + "@internal floating-point SSE constant with all bits set operand." + (and (match_test "TARGET_SSE") + (match_operand 0 "float_vector_all_ones_operand"))) + ;; Integer constant constraints. (define_constraint "Wb" "Integer constant in the range 0 @dots{} 7, for 8-bit shifts." diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index aea224a..4d4ab6a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5073,7 +5073,11 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode) if (x == const0_rtx || const0_operand (x, mode)) return 1; - if (x == constm1_rtx || vector_all_ones_operand (x, mode)) + if (x == constm1_rtx + || vector_all_ones_operand (x, mode) + || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT) + && float_vector_all_ones_operand (x, mode))) { /* VOIDmode integer constant, get mode from the predicate. */ if (mode == VOIDmode) @@ -5171,7 +5175,10 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) gcc_unreachable (); } } - else if (x == constm1_rtx || vector_all_ones_operand (x, mode)) + else if (x == constm1_rtx + || vector_all_ones_operand (x, mode) + || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + && float_vector_all_ones_operand (x, mode))) { enum attr_mode insn_mode = get_attr_mode (insn); diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 0984f7c..2d3b63f 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -952,7 +952,7 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "V4SF")]) -(define_insn "*mmx_<code>v2sf3" +(define_insn "<code>v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=x,x") (any_logic:V2SF (match_operand:V2SF 1 "register_operand" "%0,x") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 6aa1ea3..129205a 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1161,6 +1161,10 @@ (ior (match_operand 0 "nonimmediate_operand") (match_code "const_vector"))) +(define_predicate "nonimmediate_or_const_vec_dup_operand" + (ior (match_operand 0 "nonimmediate_operand") + (match_test "const_vec_duplicate_p (op)"))) + ;; Return true when OP is either register operand, or any ;; CONST_VECTOR. (define_predicate "reg_or_const_vector_operand" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a46a237..2b0d10e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -506,6 +506,13 @@ (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) +(define_mode_iterator VI248_AVX512VLBW + [(V32HI "TARGET_AVX512BW") + (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") + (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VI48_AVX2 [(V8SI "TARGET_AVX2") V4SI (V4DI "TARGET_AVX2") V2DI]) @@ -777,6 +784,15 @@ (V4SF "V4SF") (V2DF "V2DF") (TI "TI")]) +;; SSE constant -1 constraint +(define_mode_attr sseconstm1 + [(V64QI "BC") (V32HI "BC") (V16SI "BC") (V8DI "BC") (V4TI "BC") + (V32QI "BC") (V16HI "BC") (V8SI "BC") (V4DI "BC") (V2TI "BC") + (V16QI "BC") (V8HI "BC") (V4SI "BC") (V2DI "BC") (V1TI "BC") + (V16SF "BF") (V8DF "BF") + (V8SF "BF") (V4DF "BF") + (V4SF "BF") (V2DF "BF")]) + ;; Mapping of vector modes to corresponding mask size (define_mode_attr avx512fmaskmode [(V64QI "DI") (V32QI "SI") (V16QI "HI") @@ -1056,7 +1072,7 @@ [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,v ,m") (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" - " C,BC,vm,v"))] + " C,<sseconstm1>,vm,v"))] "TARGET_SSE && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode))" @@ -22786,6 +22802,35 @@ DONE; }) +(define_expand "cond_<insn><mode>" + [(set (match_operand:VI248_AVX512VLBW 0 "register_operand") + (vec_merge:VI248_AVX512VLBW + (any_shift:VI248_AVX512VLBW + (match_operand:VI248_AVX512VLBW 2 "register_operand") + (match_operand:VI248_AVX512VLBW 3 "nonimmediate_or_const_vec_dup_operand")) + (match_operand:VI248_AVX512VLBW 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "TARGET_AVX512F" +{ + if (const_vec_duplicate_p (operands[3])) + { + operands[3] = unwrap_const_vec_duplicate (operands[3]); + operands[3] = lowpart_subreg (DImode, operands[3], <ssescalarmode>mode); + emit_insn (gen_<insn><mode>3_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + } + else + emit_insn (gen_<avx2_avx512>_<insn>v<mode>_mask (operands[0], + operands[2], + operands[3], + operands[4], + operands[1])); + DONE; +}) + (define_insn "<avx2_avx512>_ashrv<mode><mask_name>" [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v") (ashiftrt:VI48_AVX512F_AVX512VL diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 6642bdf..4e4909e 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -3205,6 +3205,7 @@ nvptx_mach_vector_length () /* Loop structure of the function. The entire function is described as a NULL loop. */ +/* See also 'gcc/omp-oacc-neuter-broadcast.cc:struct parallel_g'. */ struct parallel { @@ -3282,6 +3283,7 @@ typedef auto_vec<insn_bb_t> insn_bb_vec_t; partitioning mode of the function as a whole. Populate MAP with head and tail blocks. We also clear the BB visited flag, which is used when finding partitions. */ +/* See also 'gcc/omp-oacc-neuter-broadcast.cc:omp_sese_split_blocks'. */ static void nvptx_split_blocks (bb_insn_map_t *map) @@ -3383,6 +3385,7 @@ nvptx_discover_pre (basic_block block, int expected) } /* Dump this parallel and all its inner parallels. */ +/* See also 'gcc/omp-oacc-neuter-broadcast.cc:omp_sese_dump_pars'. */ static void nvptx_dump_pars (parallel *par, unsigned depth) @@ -3408,6 +3411,7 @@ nvptx_dump_pars (parallel *par, unsigned depth) /* If BLOCK contains a fork/join marker, process it to create or terminate a loop structure. Add this block to the current loop, and then walk successor blocks. */ +/* See also 'gcc/omp-oacc-neuter-broadcast.cc:omp_sese_find_par'. */ static parallel * nvptx_find_par (bb_insn_map_t *map, parallel *par, basic_block block) @@ -3488,6 +3492,7 @@ nvptx_find_par (bb_insn_map_t *map, parallel *par, basic_block block) to head & tail markers, discovered when splitting blocks. This speeds up the discovery. We rely on the BB visited flag having been cleared when splitting blocks. */ +/* See also 'gcc/omp-oacc-neuter-broadcast.cc:omp_sese_discover_pars'. */ static parallel * nvptx_discover_pars (bb_insn_map_t *map) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 5b1c06b..60f406a 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -18363,7 +18363,12 @@ is_load_insn1 (rtx pat, rtx *load_mem) return false; if (GET_CODE (pat) == SET) - return find_mem_ref (SET_SRC (pat), load_mem); + { + if (REG_P (SET_DEST (pat))) + return find_mem_ref (SET_SRC (pat), load_mem); + else + return false; + } if (GET_CODE (pat) == PARALLEL) { @@ -18400,7 +18405,12 @@ is_store_insn1 (rtx pat, rtx *str_mem) return false; if (GET_CODE (pat) == SET) - return find_mem_ref (SET_DEST (pat), str_mem); + { + if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat))) + return find_mem_ref (SET_DEST (pat), str_mem); + else + return false; + } if (GET_CODE (pat) == PARALLEL) { diff --git a/gcc/doc/gty.texi b/gcc/doc/gty.texi index aaf97ae..cf070c1 100644 --- a/gcc/doc/gty.texi +++ b/gcc/doc/gty.texi @@ -628,10 +628,9 @@ header file that should be included in the source file you just changed. The file will be called @file{gt-@var{path}.h} where @var{path} is the pathname relative to the @file{gcc} directory with slashes replaced by @verb{|-|}, so for example the header file to be included in -@file{cp/parser.c} is called @file{gt-cp-parser.c}. The +@file{cp/parser.c} is called @file{gt-cp-parser.h}. The generated header file should be included after everything else in the -source file. Don't forget to mention this file as a dependency in the -@file{Makefile}! +source file. @end enumerate diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 7de33b9..3e12ddf 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6815,6 +6815,15 @@ private variables at OpenACC device-lowering time using the @end deftypefn @c hook-end +@deftypefn {Target Hook} tree TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD (tree @var{rec}, bool @var{sender}, const char *@var{name}) +Create a record used to propagate local-variable state from an active +worker to other workers. A possible implementation might adjust the type +of REC to place the new variable in shared GPU memory. + +Presence of this target hook indicates that middle end neutering/broadcasting +be used. +@end deftypefn + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 4a522ae..611fc50 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4223,6 +4223,8 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_GOACC_EXPAND_VAR_DECL +@hook TARGET_GOACC_CREATE_WORKER_BROADCAST_RECORD + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc index 410bc4d..d3e3e14 100644 --- a/gcc/gimple-range-fold.cc +++ b/gcc/gimple-range-fold.cc @@ -894,6 +894,9 @@ fold_using_range::range_of_builtin_call (irange &r, gcall *call, case CFN_BUILT_IN_TOUPPER: { arg = gimple_call_arg (call, 0); + // If the argument isn't compatible with the LHS, do nothing. + if (!range_compatible_p (type, TREE_TYPE (arg))) + return false; if (!src.get_operand (r, arg)) return false; @@ -913,6 +916,9 @@ fold_using_range::range_of_builtin_call (irange &r, gcall *call, case CFN_BUILT_IN_TOLOWER: { arg = gimple_call_arg (call, 0); + // If the argument isn't compatible with the LHS, do nothing. + if (!range_compatible_p (type, TREE_TYPE (arg))) + return false; if (!src.get_operand (r, arg)) return false; diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index 0afd05e..1c69d97 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -4204,7 +4204,7 @@ propagate_controlled_uses (struct cgraph_edge *cs) new_root->create_reference (n, IPA_REF_LOAD, NULL); if (dump_file) fprintf (dump_file, "ipa-prop: ...replaced it with " - " LOAD one from %s to %s.\n", + "LOAD one from %s to %s.\n", new_root->dump_name (), n->dump_name ()); } diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 97964f8..05b555c 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -29,8 +29,6 @@ along with GCC; see the file COPYING3. If not see /* The reason why they aren't in gcc/builtins.def is that the Fortran front end doesn't source those. */ -DEF_GOACC_BUILTIN (BUILT_IN_ACC_GET_DEVICE_TYPE, "acc_get_device_type", - BT_FN_INT, ATTR_NOTHROW_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_START, "GOACC_data_start", BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end", @@ -61,6 +59,15 @@ DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_PARLEVEL_ID, "goacc_parlevel_id", DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_PARLEVEL_SIZE, "goacc_parlevel_size", BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_BARRIER, "GOACC_barrier", + BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_SINGLE_START, "GOACC_single_start", + BT_FN_BOOL, ATTR_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_SINGLE_COPY_START, "GOACC_single_copy_start", + BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_SINGLE_COPY_END, "GOACC_single_copy_end", + BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) + DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_THREAD_NUM, "omp_get_thread_num", BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_NUM_THREADS, "omp_get_num_threads", diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 2f735bc..926087d 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -615,6 +615,8 @@ omp_copy_decl_1 (tree var, omp_context *ctx) /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it as appropriate. */ +/* See also 'gcc/omp-oacc-neuter-broadcast.cc:oacc_build_component_ref'. */ + static tree omp_build_component_ref (tree obj, tree field) { diff --git a/gcc/omp-oacc-neuter-broadcast.cc b/gcc/omp-oacc-neuter-broadcast.cc new file mode 100644 index 0000000..f855538 --- /dev/null +++ b/gcc/omp-oacc-neuter-broadcast.cc @@ -0,0 +1,1522 @@ +/* OpenACC worker partitioning via middle end neutering/broadcasting scheme + + Copyright (C) 2015-2021 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "rtl.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "ssa.h" +#include "cgraph.h" +#include "pretty-print.h" +#include "fold-const.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" +#include "tree-inline.h" +#include "langhooks.h" +#include "omp-general.h" +#include "omp-low.h" +#include "gimple-pretty-print.h" +#include "cfghooks.h" +#include "insn-config.h" +#include "recog.h" +#include "internal-fn.h" +#include "bitmap.h" +#include "tree-nested.h" +#include "stor-layout.h" +#include "tree-ssa-threadupdate.h" +#include "tree-into-ssa.h" +#include "splay-tree.h" +#include "target.h" +#include "cfgloop.h" +#include "tree-cfg.h" +#include "omp-offload.h" +#include "attribs.h" + +/* Loop structure of the function. The entire function is described as + a NULL loop. */ +/* Adapted from 'gcc/config/nvptx/nvptx.c:struct parallel'. */ + +struct parallel_g +{ + /* Parent parallel. */ + parallel_g *parent; + + /* Next sibling parallel. */ + parallel_g *next; + + /* First child parallel. */ + parallel_g *inner; + + /* Partitioning mask of the parallel. */ + unsigned mask; + + /* Partitioning used within inner parallels. */ + unsigned inner_mask; + + /* Location of parallel forked and join. The forked is the first + block in the parallel and the join is the first block after of + the partition. */ + basic_block forked_block; + basic_block join_block; + + gimple *forked_stmt; + gimple *join_stmt; + + gimple *fork_stmt; + gimple *joining_stmt; + + /* Basic blocks in this parallel, but not in child parallels. The + FORKED and JOINING blocks are in the partition. The FORK and JOIN + blocks are not. */ + auto_vec<basic_block> blocks; + + tree record_type; + tree sender_decl; + tree receiver_decl; + +public: + parallel_g (parallel_g *parent, unsigned mode); + ~parallel_g (); +}; + +/* Constructor links the new parallel into it's parent's chain of + children. */ + +parallel_g::parallel_g (parallel_g *parent_, unsigned mask_) + :parent (parent_), next (0), inner (0), mask (mask_), inner_mask (0) +{ + forked_block = join_block = 0; + forked_stmt = join_stmt = NULL; + fork_stmt = joining_stmt = NULL; + + record_type = NULL_TREE; + sender_decl = NULL_TREE; + receiver_decl = NULL_TREE; + + if (parent) + { + next = parent->inner; + parent->inner = this; + } +} + +parallel_g::~parallel_g () +{ + delete inner; + delete next; +} + +static bool +local_var_based_p (tree decl) +{ + switch (TREE_CODE (decl)) + { + case VAR_DECL: + return !is_global_var (decl); + + case COMPONENT_REF: + case BIT_FIELD_REF: + case ARRAY_REF: + return local_var_based_p (TREE_OPERAND (decl, 0)); + + default: + return false; + } +} + +/* Map of basic blocks to gimple stmts. */ +typedef hash_map<basic_block, gimple *> bb_stmt_map_t; + +/* Calls to OpenACC routines are made by all workers/wavefronts/warps, since + the routine likely contains partitioned loops (else will do its own + neutering and variable propagation). Return TRUE if a function call CALL + should be made in (worker) single mode instead, rather than redundant + mode. */ + +static bool +omp_sese_active_worker_call (gcall *call) +{ +#define GOMP_DIM_SEQ GOMP_DIM_MAX + tree fndecl = gimple_call_fndecl (call); + + if (!fndecl) + return true; + + tree attrs = oacc_get_fn_attrib (fndecl); + + if (!attrs) + return true; + + int level = oacc_fn_attrib_level (attrs); + + /* Neither regular functions nor "seq" routines should be run by all threads + in worker-single mode. */ + return level == -1 || level == GOMP_DIM_SEQ; +#undef GOMP_DIM_SEQ +} + +/* Split basic blocks such that each forked and join unspecs are at + the start of their basic blocks. Thus afterwards each block will + have a single partitioning mode. We also do the same for return + insns, as they are executed by every thread. Return the + partitioning mode of the function as a whole. Populate MAP with + head and tail blocks. We also clear the BB visited flag, which is + used when finding partitions. */ +/* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_split_blocks'. */ + +static void +omp_sese_split_blocks (bb_stmt_map_t *map) +{ + auto_vec<gimple *> worklist; + basic_block block; + + /* Locate all the reorg instructions of interest. */ + FOR_ALL_BB_FN (block, cfun) + { + /* Clear visited flag, for use by parallel locator */ + block->flags &= ~BB_VISITED; + + for (gimple_stmt_iterator gsi = gsi_start_bb (block); + !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_call_internal_p (stmt, IFN_UNIQUE)) + { + enum ifn_unique_kind k = ((enum ifn_unique_kind) + TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); + + if (k == IFN_UNIQUE_OACC_JOIN) + worklist.safe_push (stmt); + else if (k == IFN_UNIQUE_OACC_FORK) + { + gcc_assert (gsi_one_before_end_p (gsi)); + basic_block forked_block = single_succ (block); + gimple_stmt_iterator gsi2 = gsi_start_bb (forked_block); + + /* We push a NOP as a placeholder for the "forked" stmt. + This is then recognized in omp_sese_find_par. */ + gimple *nop = gimple_build_nop (); + gsi_insert_before (&gsi2, nop, GSI_SAME_STMT); + + worklist.safe_push (nop); + } + } + else if (gimple_code (stmt) == GIMPLE_RETURN + || gimple_code (stmt) == GIMPLE_COND + || gimple_code (stmt) == GIMPLE_SWITCH + || (gimple_code (stmt) == GIMPLE_CALL + && !gimple_call_internal_p (stmt) + && !omp_sese_active_worker_call (as_a <gcall *> (stmt)))) + worklist.safe_push (stmt); + else if (is_gimple_assign (stmt)) + { + tree lhs = gimple_assign_lhs (stmt); + + /* Force assignments to components/fields/elements of local + aggregates into fully-partitioned (redundant) mode. This + avoids having to broadcast the whole aggregate. The RHS of + the assignment will be propagated using the normal + mechanism. */ + + switch (TREE_CODE (lhs)) + { + case COMPONENT_REF: + case BIT_FIELD_REF: + case ARRAY_REF: + { + tree aggr = TREE_OPERAND (lhs, 0); + + if (local_var_based_p (aggr)) + worklist.safe_push (stmt); + } + break; + + default: + ; + } + } + } + } + + /* Split blocks on the worklist. */ + unsigned ix; + gimple *stmt; + + for (ix = 0; worklist.iterate (ix, &stmt); ix++) + { + basic_block block = gimple_bb (stmt); + + if (gimple_code (stmt) == GIMPLE_COND) + { + gcond *orig_cond = as_a <gcond *> (stmt); + tree_code code = gimple_expr_code (orig_cond); + tree pred = make_ssa_name (boolean_type_node); + gimple *asgn = gimple_build_assign (pred, code, + gimple_cond_lhs (orig_cond), + gimple_cond_rhs (orig_cond)); + gcond *new_cond + = gimple_build_cond (NE_EXPR, pred, boolean_false_node, + gimple_cond_true_label (orig_cond), + gimple_cond_false_label (orig_cond)); + + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_insert_before (&gsi, asgn, GSI_SAME_STMT); + gsi_replace (&gsi, new_cond, true); + + edge e = split_block (block, asgn); + block = e->dest; + map->get_or_insert (block) = new_cond; + } + else if ((gimple_code (stmt) == GIMPLE_CALL + && !gimple_call_internal_p (stmt)) + || is_gimple_assign (stmt)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_prev (&gsi); + + edge call = split_block (block, gsi_stmt (gsi)); + + gimple *call_stmt = gsi_stmt (gsi_start_bb (call->dest)); + + edge call_to_ret = split_block (call->dest, call_stmt); + + map->get_or_insert (call_to_ret->src) = call_stmt; + } + else + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_prev (&gsi); + + if (gsi_end_p (gsi)) + map->get_or_insert (block) = stmt; + else + { + /* Split block before insn. The insn is in the new block. */ + edge e = split_block (block, gsi_stmt (gsi)); + + block = e->dest; + map->get_or_insert (block) = stmt; + } + } + } +} + +static const char * +mask_name (unsigned mask) +{ + switch (mask) + { + case 0: return "gang redundant"; + case 1: return "gang partitioned"; + case 2: return "worker partitioned"; + case 3: return "gang+worker partitioned"; + case 4: return "vector partitioned"; + case 5: return "gang+vector partitioned"; + case 6: return "worker+vector partitioned"; + case 7: return "fully partitioned"; + default: return "<illegal>"; + } +} + +/* Dump this parallel and all its inner parallels. */ +/* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_dump_pars'. */ + +static void +omp_sese_dump_pars (parallel_g *par, unsigned depth) +{ + fprintf (dump_file, "%u: mask %d (%s) head=%d, tail=%d\n", + depth, par->mask, mask_name (par->mask), + par->forked_block ? par->forked_block->index : -1, + par->join_block ? par->join_block->index : -1); + + fprintf (dump_file, " blocks:"); + + basic_block block; + for (unsigned ix = 0; par->blocks.iterate (ix, &block); ix++) + fprintf (dump_file, " %d", block->index); + fprintf (dump_file, "\n"); + if (par->inner) + omp_sese_dump_pars (par->inner, depth + 1); + + if (par->next) + omp_sese_dump_pars (par->next, depth); +} + +/* If BLOCK contains a fork/join marker, process it to create or + terminate a loop structure. Add this block to the current loop, + and then walk successor blocks. */ +/* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_find_par'. */ + +static parallel_g * +omp_sese_find_par (bb_stmt_map_t *map, parallel_g *par, basic_block block) +{ + if (block->flags & BB_VISITED) + return par; + block->flags |= BB_VISITED; + + if (gimple **stmtp = map->get (block)) + { + gimple *stmt = *stmtp; + + if (gimple_code (stmt) == GIMPLE_COND + || gimple_code (stmt) == GIMPLE_SWITCH + || gimple_code (stmt) == GIMPLE_RETURN + || (gimple_code (stmt) == GIMPLE_CALL + && !gimple_call_internal_p (stmt)) + || is_gimple_assign (stmt)) + { + /* A single block that is forced to be at the maximum partition + level. Make a singleton par for it. */ + par = new parallel_g (par, GOMP_DIM_MASK (GOMP_DIM_GANG) + | GOMP_DIM_MASK (GOMP_DIM_WORKER) + | GOMP_DIM_MASK (GOMP_DIM_VECTOR)); + par->forked_block = block; + par->forked_stmt = stmt; + par->blocks.safe_push (block); + par = par->parent; + goto walk_successors; + } + else if (gimple_nop_p (stmt)) + { + basic_block pred = single_pred (block); + gcc_assert (pred); + gimple_stmt_iterator gsi = gsi_last_bb (pred); + gimple *final_stmt = gsi_stmt (gsi); + + if (gimple_call_internal_p (final_stmt, IFN_UNIQUE)) + { + gcall *call = as_a <gcall *> (final_stmt); + enum ifn_unique_kind k = ((enum ifn_unique_kind) + TREE_INT_CST_LOW (gimple_call_arg (call, 0))); + + if (k == IFN_UNIQUE_OACC_FORK) + { + HOST_WIDE_INT dim + = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); + unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0; + + par = new parallel_g (par, mask); + par->forked_block = block; + par->forked_stmt = final_stmt; + par->fork_stmt = stmt; + } + else + gcc_unreachable (); + } + else + gcc_unreachable (); + } + else if (gimple_call_internal_p (stmt, IFN_UNIQUE)) + { + gcall *call = as_a <gcall *> (stmt); + enum ifn_unique_kind k = ((enum ifn_unique_kind) + TREE_INT_CST_LOW (gimple_call_arg (call, 0))); + if (k == IFN_UNIQUE_OACC_JOIN) + { + HOST_WIDE_INT dim = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2)); + unsigned mask = (dim >= 0) ? GOMP_DIM_MASK (dim) : 0; + + gcc_assert (par->mask == mask); + par->join_block = block; + par->join_stmt = stmt; + par = par->parent; + } + else + gcc_unreachable (); + } + else + gcc_unreachable (); + } + + if (par) + /* Add this block onto the current loop's list of blocks. */ + par->blocks.safe_push (block); + else + /* This must be the entry block. Create a NULL parallel. */ + par = new parallel_g (0, 0); + +walk_successors: + /* Walk successor blocks. */ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, block->succs) + omp_sese_find_par (map, par, e->dest); + + return par; +} + +/* DFS walk the CFG looking for fork & join markers. Construct + loop structures as we go. MAP is a mapping of basic blocks + to head & tail markers, discovered when splitting blocks. This + speeds up the discovery. We rely on the BB visited flag having + been cleared when splitting blocks. */ +/* Adapted from 'gcc/config/nvptx/nvptx.c:nvptx_discover_pars'. */ + +static parallel_g * +omp_sese_discover_pars (bb_stmt_map_t *map) +{ + basic_block block; + + /* Mark exit blocks as visited. */ + block = EXIT_BLOCK_PTR_FOR_FN (cfun); + block->flags |= BB_VISITED; + + /* And entry block as not. */ + block = ENTRY_BLOCK_PTR_FOR_FN (cfun); + block->flags &= ~BB_VISITED; + + parallel_g *par = omp_sese_find_par (map, 0, block); + + if (dump_file) + { + fprintf (dump_file, "\nLoops\n"); + omp_sese_dump_pars (par, 0); + fprintf (dump_file, "\n"); + } + + return par; +} + +static void +populate_single_mode_bitmaps (parallel_g *par, bitmap worker_single, + bitmap vector_single, unsigned outer_mask, + int depth) +{ + unsigned mask = outer_mask | par->mask; + + basic_block block; + + for (unsigned i = 0; par->blocks.iterate (i, &block); i++) + { + if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) + bitmap_set_bit (worker_single, block->index); + + if ((mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)) == 0) + bitmap_set_bit (vector_single, block->index); + } + + if (par->inner) + populate_single_mode_bitmaps (par->inner, worker_single, vector_single, + mask, depth + 1); + if (par->next) + populate_single_mode_bitmaps (par->next, worker_single, vector_single, + outer_mask, depth); +} + +/* A map from SSA names or var decls to record fields. */ + +typedef hash_map<tree, tree> field_map_t; + +/* For each propagation record type, this is a map from SSA names or var decls + to propagate, to the field in the record type that should be used for + transmission and reception. */ + +typedef hash_map<tree, field_map_t *> record_field_map_t; + +static GTY(()) record_field_map_t *field_map; + +static void +install_var_field (tree var, tree record_type) +{ + field_map_t *fields = *field_map->get (record_type); + tree name; + char tmp[20]; + + if (TREE_CODE (var) == SSA_NAME) + { + name = SSA_NAME_IDENTIFIER (var); + if (!name) + { + sprintf (tmp, "_%u", (unsigned) SSA_NAME_VERSION (var)); + name = get_identifier (tmp); + } + } + else if (TREE_CODE (var) == VAR_DECL) + { + name = DECL_NAME (var); + if (!name) + { + sprintf (tmp, "D_%u", (unsigned) DECL_UID (var)); + name = get_identifier (tmp); + } + } + else + gcc_unreachable (); + + gcc_assert (!fields->get (var)); + + tree type = TREE_TYPE (var); + + if (POINTER_TYPE_P (type) + && TYPE_RESTRICT (type)) + type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT); + + tree field = build_decl (BUILTINS_LOCATION, FIELD_DECL, name, type); + + if (TREE_CODE (var) == VAR_DECL && type == TREE_TYPE (var)) + { + SET_DECL_ALIGN (field, DECL_ALIGN (var)); + DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var); + TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var); + } + else + SET_DECL_ALIGN (field, TYPE_ALIGN (type)); + + fields->put (var, field); + + insert_field_into_struct (record_type, field); +} + +/* Sets of SSA_NAMES or VAR_DECLs to propagate. */ +typedef hash_set<tree> propagation_set; + +static void +find_ssa_names_to_propagate (parallel_g *par, unsigned outer_mask, + bitmap worker_single, bitmap vector_single, + vec<propagation_set *> *prop_set) +{ + unsigned mask = outer_mask | par->mask; + + if (par->inner) + find_ssa_names_to_propagate (par->inner, mask, worker_single, + vector_single, prop_set); + if (par->next) + find_ssa_names_to_propagate (par->next, outer_mask, worker_single, + vector_single, prop_set); + + if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) + { + basic_block block; + int ix; + + for (ix = 0; par->blocks.iterate (ix, &block); ix++) + { + for (gphi_iterator psi = gsi_start_phis (block); + !gsi_end_p (psi); gsi_next (&psi)) + { + gphi *phi = psi.phi (); + use_operand_p use; + ssa_op_iter iter; + + FOR_EACH_PHI_ARG (use, phi, iter, SSA_OP_USE) + { + tree var = USE_FROM_PTR (use); + + if (TREE_CODE (var) != SSA_NAME) + continue; + + gimple *def_stmt = SSA_NAME_DEF_STMT (var); + + if (gimple_nop_p (def_stmt)) + continue; + + basic_block def_bb = gimple_bb (def_stmt); + + if (bitmap_bit_p (worker_single, def_bb->index)) + { + if (!(*prop_set)[def_bb->index]) + (*prop_set)[def_bb->index] = new propagation_set; + + propagation_set *ws_prop = (*prop_set)[def_bb->index]; + + ws_prop->add (var); + } + } + } + + for (gimple_stmt_iterator gsi = gsi_start_bb (block); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + use_operand_p use; + ssa_op_iter iter; + gimple *stmt = gsi_stmt (gsi); + + FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) + { + tree var = USE_FROM_PTR (use); + + gimple *def_stmt = SSA_NAME_DEF_STMT (var); + + if (gimple_nop_p (def_stmt)) + continue; + + basic_block def_bb = gimple_bb (def_stmt); + + if (bitmap_bit_p (worker_single, def_bb->index)) + { + if (!(*prop_set)[def_bb->index]) + (*prop_set)[def_bb->index] = new propagation_set; + + propagation_set *ws_prop = (*prop_set)[def_bb->index]; + + ws_prop->add (var); + } + } + } + } + } +} + +/* Callback for walk_gimple_stmt to find RHS VAR_DECLs (uses) in a + statement. */ + +static tree +find_partitioned_var_uses_1 (tree *node, int *, void *data) +{ + walk_stmt_info *wi = (walk_stmt_info *) data; + hash_set<tree> *partitioned_var_uses = (hash_set<tree> *) wi->info; + + if (!wi->is_lhs && VAR_P (*node)) + partitioned_var_uses->add (*node); + + return NULL_TREE; +} + +static void +find_partitioned_var_uses (parallel_g *par, unsigned outer_mask, + hash_set<tree> *partitioned_var_uses) +{ + unsigned mask = outer_mask | par->mask; + + if (par->inner) + find_partitioned_var_uses (par->inner, mask, partitioned_var_uses); + if (par->next) + find_partitioned_var_uses (par->next, outer_mask, partitioned_var_uses); + + if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) + { + basic_block block; + int ix; + + for (ix = 0; par->blocks.iterate (ix, &block); ix++) + for (gimple_stmt_iterator gsi = gsi_start_bb (block); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.info = (void *) partitioned_var_uses; + walk_gimple_stmt (&gsi, NULL, find_partitioned_var_uses_1, &wi); + } + } +} + +/* Gang-private variables (typically placed in a GPU's shared memory) do not + need to be processed by the worker-propagation mechanism. Populate the + GANG_PRIVATE_VARS set with any such variables found in the current + function. */ + +static void +find_gang_private_vars (hash_set<tree> *gang_private_vars) +{ + basic_block block; + + FOR_EACH_BB_FN (block, cfun) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (block); + !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_call_internal_p (stmt, IFN_UNIQUE)) + { + enum ifn_unique_kind k = ((enum ifn_unique_kind) + TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); + if (k == IFN_UNIQUE_OACC_PRIVATE) + { + HOST_WIDE_INT level + = TREE_INT_CST_LOW (gimple_call_arg (stmt, 2)); + if (level != GOMP_DIM_GANG) + continue; + for (unsigned i = 3; i < gimple_call_num_args (stmt); i++) + { + tree arg = gimple_call_arg (stmt, i); + gcc_assert (TREE_CODE (arg) == ADDR_EXPR); + tree decl = TREE_OPERAND (arg, 0); + gang_private_vars->add (decl); + } + } + } + } + } +} + +static void +find_local_vars_to_propagate (parallel_g *par, unsigned outer_mask, + hash_set<tree> *partitioned_var_uses, + hash_set<tree> *gang_private_vars, + vec<propagation_set *> *prop_set) +{ + unsigned mask = outer_mask | par->mask; + + if (par->inner) + find_local_vars_to_propagate (par->inner, mask, partitioned_var_uses, + gang_private_vars, prop_set); + if (par->next) + find_local_vars_to_propagate (par->next, outer_mask, partitioned_var_uses, + gang_private_vars, prop_set); + + if (!(mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))) + { + basic_block block; + int ix; + + for (ix = 0; par->blocks.iterate (ix, &block); ix++) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (block); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + tree var; + unsigned i; + + FOR_EACH_LOCAL_DECL (cfun, i, var) + { + if (!VAR_P (var) + || is_global_var (var) + || AGGREGATE_TYPE_P (TREE_TYPE (var)) + || !partitioned_var_uses->contains (var) + || gang_private_vars->contains (var)) + continue; + + if (stmt_may_clobber_ref_p (stmt, var)) + { + if (dump_file) + { + fprintf (dump_file, "bb %u: local variable may be " + "clobbered in %s mode: ", block->index, + mask_name (mask)); + print_generic_expr (dump_file, var, TDF_SLIM); + fprintf (dump_file, "\n"); + } + + if (!(*prop_set)[block->index]) + (*prop_set)[block->index] = new propagation_set; + + propagation_set *ws_prop + = (*prop_set)[block->index]; + + ws_prop->add (var); + } + } + } + } + } +} + +/* Transform basic blocks FROM, TO (which may be the same block) into: + if (GOACC_single_start ()) + BLOCK; + GOACC_barrier (); + \ | / + +----+ + | | (new) predicate block + +----+-- + \ | / \ | / |t \ + +----+ +----+ +----+ | + | | | | ===> | | | f (old) from block + +----+ +----+ +----+ | + | t/ \f | / + +----+/ + (split (split before | | skip block + at end) condition) +----+ + t/ \f +*/ + +static void +worker_single_simple (basic_block from, basic_block to, + hash_set<tree> *def_escapes_block) +{ + gimple *call, *cond; + tree lhs, decl; + basic_block skip_block; + + gimple_stmt_iterator gsi = gsi_last_bb (to); + if (EDGE_COUNT (to->succs) > 1) + { + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND); + gsi_prev (&gsi); + } + edge e = split_block (to, gsi_stmt (gsi)); + skip_block = e->dest; + + gimple_stmt_iterator start = gsi_after_labels (from); + + decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_START); + lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl))); + call = gimple_build_call (decl, 0); + gimple_call_set_lhs (call, lhs); + gsi_insert_before (&start, call, GSI_NEW_STMT); + update_stmt (call); + + cond = gimple_build_cond (EQ_EXPR, lhs, + fold_convert_loc (UNKNOWN_LOCATION, + TREE_TYPE (lhs), + boolean_true_node), + NULL_TREE, NULL_TREE); + gsi_insert_after (&start, cond, GSI_NEW_STMT); + update_stmt (cond); + + edge et = split_block (from, cond); + et->flags &= ~EDGE_FALLTHRU; + et->flags |= EDGE_TRUE_VALUE; + /* Make the active worker the more probable path so we prefer fallthrough + (letting the idle workers jump around more). */ + et->probability = profile_probability::likely (); + + edge ef = make_edge (from, skip_block, EDGE_FALSE_VALUE); + ef->probability = et->probability.invert (); + + basic_block neutered = split_edge (ef); + gimple_stmt_iterator neut_gsi = gsi_last_bb (neutered); + + for (gsi = gsi_start_bb (et->dest); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + ssa_op_iter iter; + tree var; + + FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF) + { + if (def_escapes_block->contains (var)) + { + gphi *join_phi = create_phi_node (NULL_TREE, skip_block); + create_new_def_for (var, join_phi, + gimple_phi_result_ptr (join_phi)); + add_phi_arg (join_phi, var, e, UNKNOWN_LOCATION); + + tree neutered_def = copy_ssa_name (var, NULL); + /* We really want "don't care" or some value representing + undefined here, but optimizers will probably get rid of the + zero-assignments anyway. */ + gassign *zero = gimple_build_assign (neutered_def, + build_zero_cst (TREE_TYPE (neutered_def))); + + gsi_insert_after (&neut_gsi, zero, GSI_CONTINUE_LINKING); + update_stmt (zero); + + add_phi_arg (join_phi, neutered_def, single_succ_edge (neutered), + UNKNOWN_LOCATION); + update_stmt (join_phi); + } + } + } + + gsi = gsi_start_bb (skip_block); + + decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); + gimple *acc_bar = gimple_build_call (decl, 0); + + gsi_insert_before (&gsi, acc_bar, GSI_SAME_STMT); + update_stmt (acc_bar); +} + +/* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it + as appropriate. */ +/* Adapted from 'gcc/omp-low.c:omp_build_component_ref'. */ + +static tree +oacc_build_component_ref (tree obj, tree field) +{ + tree field_type = TREE_TYPE (field); + tree obj_type = TREE_TYPE (obj); + if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (obj_type))) + field_type = build_qualified_type + (field_type, + KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (obj_type))); + + tree ret = build3 (COMPONENT_REF, field_type, obj, field, NULL); + if (TREE_THIS_VOLATILE (field)) + TREE_THIS_VOLATILE (ret) |= 1; + if (TREE_READONLY (field)) + TREE_READONLY (ret) |= 1; + return ret; +} + +static tree +build_receiver_ref (tree record_type, tree var, tree receiver_decl) +{ + field_map_t *fields = *field_map->get (record_type); + tree x = build_simple_mem_ref (receiver_decl); + tree field = *fields->get (var); + TREE_THIS_NOTRAP (x) = 1; + x = oacc_build_component_ref (x, field); + return x; +} + +static tree +build_sender_ref (tree record_type, tree var, tree sender_decl) +{ + field_map_t *fields = *field_map->get (record_type); + tree field = *fields->get (var); + return oacc_build_component_ref (sender_decl, field); +} + +static int +sort_by_ssa_version_or_uid (const void *p1, const void *p2) +{ + const tree t1 = *(const tree *)p1; + const tree t2 = *(const tree *)p2; + + if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) == SSA_NAME) + return SSA_NAME_VERSION (t1) - SSA_NAME_VERSION (t2); + else if (TREE_CODE (t1) == SSA_NAME && TREE_CODE (t2) != SSA_NAME) + return -1; + else if (TREE_CODE (t1) != SSA_NAME && TREE_CODE (t2) == SSA_NAME) + return 1; + else + return DECL_UID (t1) - DECL_UID (t2); +} + +static int +sort_by_size_then_ssa_version_or_uid (const void *p1, const void *p2) +{ + const tree t1 = *(const tree *)p1; + const tree t2 = *(const tree *)p2; + unsigned HOST_WIDE_INT s1 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t1))); + unsigned HOST_WIDE_INT s2 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (t2))); + if (s1 != s2) + return s2 - s1; + else + return sort_by_ssa_version_or_uid (p1, p2); +} + +static void +worker_single_copy (basic_block from, basic_block to, + hash_set<tree> *def_escapes_block, + hash_set<tree> *worker_partitioned_uses, + tree record_type) +{ + /* If we only have virtual defs, we'll have no record type, but we still want + to emit single_copy_start and (particularly) single_copy_end to act as + a vdef source on the neutered edge representing memory writes on the + non-neutered edge. */ + if (!record_type) + record_type = char_type_node; + + tree sender_decl + = targetm.goacc.create_worker_broadcast_record (record_type, true, + ".oacc_worker_o"); + tree receiver_decl + = targetm.goacc.create_worker_broadcast_record (record_type, false, + ".oacc_worker_i"); + + gimple_stmt_iterator gsi = gsi_last_bb (to); + if (EDGE_COUNT (to->succs) > 1) + gsi_prev (&gsi); + edge e = split_block (to, gsi_stmt (gsi)); + basic_block barrier_block = e->dest; + + gimple_stmt_iterator start = gsi_after_labels (from); + + tree decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_START); + + tree lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl))); + + gimple *call = gimple_build_call (decl, 1, + build_fold_addr_expr (sender_decl)); + gimple_call_set_lhs (call, lhs); + gsi_insert_before (&start, call, GSI_NEW_STMT); + update_stmt (call); + + tree conv_tmp = make_ssa_name (TREE_TYPE (receiver_decl)); + + gimple *conv = gimple_build_assign (conv_tmp, + fold_convert (TREE_TYPE (receiver_decl), + lhs)); + update_stmt (conv); + gsi_insert_after (&start, conv, GSI_NEW_STMT); + gimple *asgn = gimple_build_assign (receiver_decl, conv_tmp); + gsi_insert_after (&start, asgn, GSI_NEW_STMT); + update_stmt (asgn); + + tree zero_ptr = build_int_cst (TREE_TYPE (receiver_decl), 0); + + tree recv_tmp = make_ssa_name (TREE_TYPE (receiver_decl)); + asgn = gimple_build_assign (recv_tmp, receiver_decl); + gsi_insert_after (&start, asgn, GSI_NEW_STMT); + update_stmt (asgn); + + gimple *cond = gimple_build_cond (EQ_EXPR, recv_tmp, zero_ptr, NULL_TREE, + NULL_TREE); + update_stmt (cond); + + gsi_insert_after (&start, cond, GSI_NEW_STMT); + + edge et = split_block (from, cond); + et->flags &= ~EDGE_FALLTHRU; + et->flags |= EDGE_TRUE_VALUE; + /* Make the active worker the more probable path so we prefer fallthrough + (letting the idle workers jump around more). */ + et->probability = profile_probability::likely (); + + basic_block body = et->dest; + + edge ef = make_edge (from, barrier_block, EDGE_FALSE_VALUE); + ef->probability = et->probability.invert (); + + decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); + gimple *acc_bar = gimple_build_call (decl, 0); + + gimple_stmt_iterator bar_gsi = gsi_start_bb (barrier_block); + gsi_insert_before (&bar_gsi, acc_bar, GSI_NEW_STMT); + + cond = gimple_build_cond (NE_EXPR, recv_tmp, zero_ptr, NULL_TREE, NULL_TREE); + gsi_insert_after (&bar_gsi, cond, GSI_NEW_STMT); + + edge et2 = split_block (barrier_block, cond); + et2->flags &= ~EDGE_FALLTHRU; + et2->flags |= EDGE_TRUE_VALUE; + et2->probability = profile_probability::unlikely (); + + basic_block exit_block = et2->dest; + + basic_block copyout_block = split_edge (et2); + edge ef2 = make_edge (barrier_block, exit_block, EDGE_FALSE_VALUE); + ef2->probability = et2->probability.invert (); + + gimple_stmt_iterator copyout_gsi = gsi_start_bb (copyout_block); + + edge copyout_to_exit = single_succ_edge (copyout_block); + + gimple_seq sender_seq = NULL; + + /* Make sure we iterate over definitions in a stable order. */ + auto_vec<tree> escape_vec (def_escapes_block->elements ()); + for (hash_set<tree>::iterator it = def_escapes_block->begin (); + it != def_escapes_block->end (); ++it) + escape_vec.quick_push (*it); + escape_vec.qsort (sort_by_ssa_version_or_uid); + + for (unsigned i = 0; i < escape_vec.length (); i++) + { + tree var = escape_vec[i]; + + if (TREE_CODE (var) == SSA_NAME && SSA_NAME_IS_VIRTUAL_OPERAND (var)) + continue; + + tree barrier_def = 0; + + if (TREE_CODE (var) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (var); + + if (gimple_nop_p (def_stmt)) + continue; + + /* The barrier phi takes one result from the actual work of the + block we're neutering, and the other result is constant zero of + the same type. */ + + gphi *barrier_phi = create_phi_node (NULL_TREE, barrier_block); + barrier_def = create_new_def_for (var, barrier_phi, + gimple_phi_result_ptr (barrier_phi)); + + add_phi_arg (barrier_phi, var, e, UNKNOWN_LOCATION); + add_phi_arg (barrier_phi, build_zero_cst (TREE_TYPE (var)), ef, + UNKNOWN_LOCATION); + + update_stmt (barrier_phi); + } + else + gcc_assert (TREE_CODE (var) == VAR_DECL); + + /* If we had no record type, we will have no fields map. */ + field_map_t **fields_p = field_map->get (record_type); + field_map_t *fields = fields_p ? *fields_p : NULL; + + if (worker_partitioned_uses->contains (var) + && fields + && fields->get (var)) + { + tree neutered_def = make_ssa_name (TREE_TYPE (var)); + + /* Receive definition from shared memory block. */ + + tree receiver_ref = build_receiver_ref (record_type, var, + receiver_decl); + gassign *recv = gimple_build_assign (neutered_def, + receiver_ref); + gsi_insert_after (©out_gsi, recv, GSI_CONTINUE_LINKING); + update_stmt (recv); + + if (TREE_CODE (var) == VAR_DECL) + { + /* If it's a VAR_DECL, we only copied to an SSA temporary. Copy + to the final location now. */ + gassign *asgn = gimple_build_assign (var, neutered_def); + gsi_insert_after (©out_gsi, asgn, GSI_CONTINUE_LINKING); + update_stmt (asgn); + } + else + { + /* If it's an SSA name, create a new phi at the join node to + represent either the output from the active worker (the + barrier) or the inactive workers (the copyout block). */ + gphi *join_phi = create_phi_node (NULL_TREE, exit_block); + create_new_def_for (barrier_def, join_phi, + gimple_phi_result_ptr (join_phi)); + add_phi_arg (join_phi, barrier_def, ef2, UNKNOWN_LOCATION); + add_phi_arg (join_phi, neutered_def, copyout_to_exit, + UNKNOWN_LOCATION); + update_stmt (join_phi); + } + + /* Send definition to shared memory block. */ + + tree sender_ref = build_sender_ref (record_type, var, sender_decl); + + if (TREE_CODE (var) == SSA_NAME) + { + gassign *send = gimple_build_assign (sender_ref, var); + gimple_seq_add_stmt (&sender_seq, send); + update_stmt (send); + } + else if (TREE_CODE (var) == VAR_DECL) + { + tree tmp = make_ssa_name (TREE_TYPE (var)); + gassign *send = gimple_build_assign (tmp, var); + gimple_seq_add_stmt (&sender_seq, send); + update_stmt (send); + send = gimple_build_assign (sender_ref, tmp); + gimple_seq_add_stmt (&sender_seq, send); + update_stmt (send); + } + else + gcc_unreachable (); + } + } + + /* It's possible for the ET->DEST block (the work done by the active thread) + to finish with a control-flow insn, e.g. a UNIQUE function call. Split + the block and add SENDER_SEQ in the latter part to avoid having control + flow in the middle of a BB. */ + + decl = builtin_decl_explicit (BUILT_IN_GOACC_SINGLE_COPY_END); + call = gimple_build_call (decl, 1, build_fold_addr_expr (sender_decl)); + gimple_seq_add_stmt (&sender_seq, call); + + gsi = gsi_last_bb (body); + gimple *last = gsi_stmt (gsi); + basic_block sender_block = split_block (body, last)->dest; + gsi = gsi_last_bb (sender_block); + gsi_insert_seq_after (&gsi, sender_seq, GSI_CONTINUE_LINKING); +} + +static void +neuter_worker_single (parallel_g *par, unsigned outer_mask, + bitmap worker_single, bitmap vector_single, + vec<propagation_set *> *prop_set, + hash_set<tree> *partitioned_var_uses) +{ + unsigned mask = outer_mask | par->mask; + + if ((mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) + { + basic_block block; + + for (unsigned i = 0; par->blocks.iterate (i, &block); i++) + { + bool has_defs = false; + hash_set<tree> def_escapes_block; + hash_set<tree> worker_partitioned_uses; + unsigned j; + tree var; + + FOR_EACH_SSA_NAME (j, var, cfun) + { + if (SSA_NAME_IS_VIRTUAL_OPERAND (var)) + { + has_defs = true; + continue; + } + + gimple *def_stmt = SSA_NAME_DEF_STMT (var); + + if (gimple_nop_p (def_stmt)) + continue; + + if (gimple_bb (def_stmt)->index != block->index) + continue; + + gimple *use_stmt; + imm_use_iterator use_iter; + bool uses_outside_block = false; + bool worker_partitioned_use = false; + + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, var) + { + int blocknum = gimple_bb (use_stmt)->index; + + /* Don't propagate SSA names that are only used in the + current block, unless the usage is in a phi node: that + means the name left the block, then came back in at the + top. */ + if (blocknum != block->index + || gimple_code (use_stmt) == GIMPLE_PHI) + uses_outside_block = true; + if (!bitmap_bit_p (worker_single, blocknum)) + worker_partitioned_use = true; + } + + if (uses_outside_block) + def_escapes_block.add (var); + + if (worker_partitioned_use) + { + worker_partitioned_uses.add (var); + has_defs = true; + } + } + + propagation_set *ws_prop = (*prop_set)[block->index]; + + if (ws_prop) + { + for (propagation_set::iterator it = ws_prop->begin (); + it != ws_prop->end (); + ++it) + { + tree var = *it; + if (TREE_CODE (var) == VAR_DECL) + { + def_escapes_block.add (var); + if (partitioned_var_uses->contains (var)) + { + worker_partitioned_uses.add (var); + has_defs = true; + } + } + } + + delete ws_prop; + (*prop_set)[block->index] = 0; + } + + tree record_type = (tree) block->aux; + + if (has_defs) + worker_single_copy (block, block, &def_escapes_block, + &worker_partitioned_uses, record_type); + else + worker_single_simple (block, block, &def_escapes_block); + } + } + + if ((outer_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) == 0) + { + basic_block block; + + for (unsigned i = 0; par->blocks.iterate (i, &block); i++) + for (gimple_stmt_iterator gsi = gsi_start_bb (block); + !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_code (stmt) == GIMPLE_CALL + && !gimple_call_internal_p (stmt) + && !omp_sese_active_worker_call (as_a <gcall *> (stmt))) + { + /* If we have an OpenACC routine call in worker-single mode, + place barriers before and afterwards to prevent + clobbering re-used shared memory regions (as are used + for AMDGCN at present, for example). */ + tree decl = builtin_decl_explicit (BUILT_IN_GOACC_BARRIER); + gsi_insert_before (&gsi, gimple_build_call (decl, 0), + GSI_SAME_STMT); + gsi_insert_after (&gsi, gimple_build_call (decl, 0), + GSI_NEW_STMT); + } + } + } + + if (par->inner) + neuter_worker_single (par->inner, mask, worker_single, vector_single, + prop_set, partitioned_var_uses); + if (par->next) + neuter_worker_single (par->next, outer_mask, worker_single, vector_single, + prop_set, partitioned_var_uses); +} + +static int +execute_omp_oacc_neuter_broadcast () +{ + bb_stmt_map_t bb_stmt_map; + auto_bitmap worker_single, vector_single; + + omp_sese_split_blocks (&bb_stmt_map); + + if (dump_file) + { + fprintf (dump_file, "\n\nAfter splitting:\n\n"); + dump_function_to_file (current_function_decl, dump_file, dump_flags); + } + + unsigned mask = 0; + + /* If this is a routine, calculate MASK as if the outer levels are already + partitioned. */ + tree attr = oacc_get_fn_attrib (current_function_decl); + if (attr) + { + tree dims = TREE_VALUE (attr); + unsigned ix; + for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims)) + { + tree allowed = TREE_PURPOSE (dims); + if (allowed && integer_zerop (allowed)) + mask |= GOMP_DIM_MASK (ix); + } + } + + parallel_g *par = omp_sese_discover_pars (&bb_stmt_map); + populate_single_mode_bitmaps (par, worker_single, vector_single, mask, 0); + + basic_block bb; + FOR_ALL_BB_FN (bb, cfun) + bb->aux = NULL; + + field_map = record_field_map_t::create_ggc (40); + + vec<propagation_set *> prop_set; + prop_set.create (last_basic_block_for_fn (cfun)); + + for (int i = 0; i < last_basic_block_for_fn (cfun); i++) + prop_set.quick_push (0); + + find_ssa_names_to_propagate (par, mask, worker_single, vector_single, + &prop_set); + + hash_set<tree> partitioned_var_uses; + hash_set<tree> gang_private_vars; + + find_gang_private_vars (&gang_private_vars); + find_partitioned_var_uses (par, mask, &partitioned_var_uses); + find_local_vars_to_propagate (par, mask, &partitioned_var_uses, + &gang_private_vars, &prop_set); + + FOR_ALL_BB_FN (bb, cfun) + { + propagation_set *ws_prop = prop_set[bb->index]; + if (ws_prop) + { + tree record_type = lang_hooks.types.make_type (RECORD_TYPE); + tree name = create_tmp_var_name (".oacc_ws_data_s"); + name = build_decl (UNKNOWN_LOCATION, TYPE_DECL, name, record_type); + DECL_ARTIFICIAL (name) = 1; + DECL_NAMELESS (name) = 1; + TYPE_NAME (record_type) = name; + TYPE_ARTIFICIAL (record_type) = 1; + + auto_vec<tree> field_vec (ws_prop->elements ()); + for (hash_set<tree>::iterator it = ws_prop->begin (); + it != ws_prop->end (); ++it) + field_vec.quick_push (*it); + + field_vec.qsort (sort_by_size_then_ssa_version_or_uid); + + field_map->put (record_type, field_map_t::create_ggc (17)); + + /* Insert var fields in reverse order, so the last inserted element + is the first in the structure. */ + for (int i = field_vec.length () - 1; i >= 0; i--) + install_var_field (field_vec[i], record_type); + + layout_type (record_type); + + bb->aux = (tree) record_type; + } + } + + neuter_worker_single (par, mask, worker_single, vector_single, &prop_set, + &partitioned_var_uses); + + prop_set.release (); + + /* This doesn't seem to make a difference. */ + loops_state_clear (LOOP_CLOSED_SSA); + + /* Neutering worker-single neutered blocks will invalidate dominance info. + It may be possible to incrementally update just the affected blocks, but + obliterate everything for now. */ + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + + if (dump_file) + { + fprintf (dump_file, "\n\nAfter neutering:\n\n"); + dump_function_to_file (current_function_decl, dump_file, dump_flags); + } + + return 0; +} + +namespace { + +const pass_data pass_data_omp_oacc_neuter_broadcast = +{ + GIMPLE_PASS, /* type */ + "omp_oacc_neuter_broadcast", /* name */ + OPTGROUP_OMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */ +}; + +class pass_omp_oacc_neuter_broadcast : public gimple_opt_pass +{ +public: + pass_omp_oacc_neuter_broadcast (gcc::context *ctxt) + : gimple_opt_pass (pass_data_omp_oacc_neuter_broadcast, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return (flag_openacc + && targetm.goacc.create_worker_broadcast_record); + }; + + virtual unsigned int execute (function *) + { + return execute_omp_oacc_neuter_broadcast (); + } + +}; // class pass_omp_oacc_neuter_broadcast + +} // anon namespace + +gimple_opt_pass * +make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt) +{ + return new pass_omp_oacc_neuter_broadcast (ctxt); +} diff --git a/gcc/optc-gen.awk b/gcc/optc-gen.awk index 880ac77..77e598e 100644 --- a/gcc/optc-gen.awk +++ b/gcc/optc-gen.awk @@ -195,10 +195,14 @@ for (i = 0; i < n_extra_vars; i++) { } for (i = 0; i < n_opts; i++) { name = var_name(flags[i]); - if (name == "") + init = opt_args("Init", flags[i]) + + if (name == "") { + if (init != "") + print "#error " opts[i] " must specify Var to use Init" continue; + } - init = opt_args("Init", flags[i]) if (init != "") { if (name in var_init && var_init[name] != init) print "#error multiple initializers for " name @@ -2737,12 +2737,14 @@ common_handle_option (struct gcc_options *opts, /* Deferred. */ break; -#ifndef ACCEL_COMPILER case OPT_foffload_abi_: +#ifdef ACCEL_COMPILER + /* Handled in the 'mkoffload's. */ +#else error_at (loc, "%<-foffload-abi%> option can be specified only for " "offload compiler"); - break; #endif + break; case OPT_fpack_struct_: if (value <= 0 || (value & (value - 1)) || value > 16) diff --git a/gcc/passes.def b/gcc/passes.def index 26d86df..d7a1f8c 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -184,6 +184,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_fixup_cfg); NEXT_PASS (pass_lower_eh_dispatch); NEXT_PASS (pass_oacc_loop_designation); + NEXT_PASS (pass_omp_oacc_neuter_broadcast); NEXT_PASS (pass_oacc_device_lower); NEXT_PASS (pass_omp_device_lower); NEXT_PASS (pass_omp_target_link); diff --git a/gcc/target.def b/gcc/target.def index 543ac78..2652d8c 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1742,6 +1742,17 @@ private variables at OpenACC device-lowering time using the\n\ rtx, (tree var), NULL) +DEFHOOK +(create_worker_broadcast_record, +"Create a record used to propagate local-variable state from an active\n\ +worker to other workers. A possible implementation might adjust the type\n\ +of REC to place the new variable in shared GPU memory.\n\ +\n\ +Presence of this target hook indicates that middle end neutering/broadcasting\n\ +be used.", +tree, (tree rec, bool sender, const char *name), +NULL) + HOOK_VECTOR_END (goacc) /* Functions relating to vectorization. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 034fc30..d0d2584 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,70 @@ +2021-08-09 Andrew MacLeod <amacleod@redhat.com> + + * gcc.dg/pr101741.c: New. + +2021-08-09 Martin Jambor <mjambor@suse.cz> + + PR testsuite/101654 + * gcc.dg/ipa/remref-6.c: Added missing -fdump-ipa-inline option. + +2021-08-09 Uroš Bizjak <ubizjak@gmail.com> + + PR target/101812 + * gcc.target/i386/pr101812.c: New test. + +2021-08-09 Tejas Belagod <tejas.belagod@arm.com> + + * gcc.target/aarch64/vect-shr-reg.c: New testcase. + * gcc.target/aarch64/vect-shr-reg-run.c: Likewise. + +2021-08-09 Roger Sayle <roger@nextmovesoftware.com> + + * gcc.dg/pr68217.c: Add -fno-tree-ccp option. + * gcc.dg/tree-ssa/vrp24.c: Add -fno-tree-ccp option. + * g++.dg/ipa/pure-const-3.C: Add -fno-tree-ccp option. + +2021-08-09 Tobias Burnus <tobias@codesourcery.com> + + PR libfortran/101305 + PR fortran/101660 + * lib/gfortran.exp (gfortran_init): Add -I $specdir/libgfortran to + GFORTRAN_UNDER_TEST; update it when set by previous gfortran_init call. + * gfortran.dg/ISO_Fortran_binding_1.c: Use <...> not "..." for + ISO_Fortran_binding.h's #include. + * gfortran.dg/ISO_Fortran_binding_10.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_11.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_12.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_15.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_16.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_17.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_18.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_3.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_5.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_6.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_7.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_8.c: Likewise. + * gfortran.dg/ISO_Fortran_binding_9.c: Likewise. + * gfortran.dg/PR94327.c: Likewise. + * gfortran.dg/PR94331.c: Likewise. + * gfortran.dg/bind_c_array_params_3_aux.c: Likewise. + * gfortran.dg/iso_fortran_binding_uint8_array_driver.c: Likewise. + * gfortran.dg/pr93524.c: Likewise. + +2021-08-09 Jonathan Wright <jonathan.wright@arm.com> + + * gcc.target/aarch64/sve/dup_lane_1.c: Don't split + scan-assembler tests over multiple lines. Expect 32-bit + result values in 'w' registers. + * gcc.target/aarch64/sve/extract_1.c: Likewise. + * gcc.target/aarch64/sve/extract_2.c: Likewise. + * gcc.target/aarch64/sve/extract_3.c: Likewise. + * gcc.target/aarch64/sve/extract_4.c: Likewise. + +2021-08-09 Jonathan Wright <jonathan.wright@arm.com> + + * gcc.target/aarch64/vector_structure_intrinsics.c: Restrict + tests to little-endian targets. + 2021-08-08 Jeff Law <jlaw@localhost.localdomain> * gcc.target/tic6x/rotdi16-scan.c: Pull rotate into its own function. diff --git a/gcc/testsuite/g++.dg/ipa/pure-const-3.C b/gcc/testsuite/g++.dg/ipa/pure-const-3.C index 4cf9a6a..172a36b 100644 --- a/gcc/testsuite/g++.dg/ipa/pure-const-3.C +++ b/gcc/testsuite/g++.dg/ipa/pure-const-3.C @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-ipa-vrp -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-ipa-vrp -fdump-tree-optimized -fno-tree-ccp" } */ int *ptr; static int barvar; static int b(int a); diff --git a/gcc/testsuite/gcc.dg/ipa/remref-6.c b/gcc/testsuite/gcc.dg/ipa/remref-6.c index de36493..7deae31 100644 --- a/gcc/testsuite/gcc.dg/ipa/remref-6.c +++ b/gcc/testsuite/gcc.dg/ipa/remref-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-early-inlining -fno-ipa-cp -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-early-inlining -fno-ipa-cp -fdump-ipa-inline -fdump-tree-optimized" } */ static double global = 0.0; diff --git a/gcc/testsuite/gcc.dg/pr101741.c b/gcc/testsuite/gcc.dg/pr101741.c new file mode 100644 index 0000000..6587dca --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101741.c @@ -0,0 +1,16 @@ +/* PR tree-optimization/101741 */ +/* { dg-do compile } */ +/* { dg-options "-O2 " } */ + +int +foo (void); + +unsigned int +toupper (int c) +{ + c = foo (); + while (c) + c = toupper (c); + + return c; +} diff --git a/gcc/testsuite/gcc.dg/pr68217.c b/gcc/testsuite/gcc.dg/pr68217.c index c5b0d1f..eb4f15e 100644 --- a/gcc/testsuite/gcc.dg/pr68217.c +++ b/gcc/testsuite/gcc.dg/pr68217.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1" } */ +/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fno-tree-ccp" } */ int foo (void) { diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c index dfe44b3..91015da 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-evrp-details -fdump-tree-optimized" } */ +/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-evrp-details -fdump-tree-optimized -fno-tree-ccp" } */ struct rtx_def; diff --git a/gcc/testsuite/gcc.dg/vla-stexp-1.c b/gcc/testsuite/gcc.dg/vla-stexp-1.c new file mode 100644 index 0000000..97d6693 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vla-stexp-1.c @@ -0,0 +1,18 @@ +/* PR29970*/ +/* { dg-do run } */ +/* { dg-options "-Wall -O0" } */ + +int foo(void) +{ + int n = 0; + return sizeof(*({ n = 10; struct foo { int x[n]; } x; &x; })); +} + + +int main() +{ + if (sizeof(struct foo { int x[10]; }) != foo()) + __builtin_abort(); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c new file mode 100644 index 0000000..3190448 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg-run.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -march=armv8.2-a" } */ + +#include "vect-shr-reg.c" + +int +main(void) +{ + int64_t a[16]; + int64_t b[16]; + int64_t c[17]; + + uint64_t ua[16]; + uint64_t ub[16]; + uint64_t uc[17]; + + int64_t res_a[16]; + uint64_t res_ua[16]; + + int i; + + /* Set up inputs. */ + for (i = 0; i < 16; i++) + { + b[i] = -2; + c[i] = 34; + ub[i] = 0xffffffffffffffff; + uc[i] = 52; + } + + /* Set up reference values. */ + for (i = 0; i < 16; i++) + { + res_a[i] = -1LL; + res_ua[i] = 0x0fffLL; + } + + /* Do the shifts. */ + f (ua, ub, uc); + g (a, b, c); + + /* Compare outputs against reference values. */ + for (i = 0; i < 16; i++) + { + if (a[i] != res_a[i]) + __builtin_abort (); + + if (ua[i] != res_ua[i]) + __builtin_abort (); + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c new file mode 100644 index 0000000..5736daf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-shr-reg.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a" } */ + +#include <stdint.h> +#include <stdio.h> + +#pragma GCC target "+nosve" + +int __attribute__((noinline)) +f(uint64_t *__restrict a, uint64_t *__restrict b, uint64_t *__restrict c) +{ + int i; + + for (i = 0; i < 16; i++) + a[i] = b[i] >> c[i]; +} + + +int __attribute__((noinline)) +g(int64_t *__restrict a, int64_t *__restrict b, int64_t *__restrict c) +{ + int i; + + for (i = 0; i < 16; i++) + a[i] = b[i] >> c[i]; +} + +/* { dg-final { scan-assembler "neg\\tv" } } */ +/* { dg-final { scan-assembler "ushl\\tv" } } */ +/* { dg-final { scan-assembler "sshl\\tv" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c index 1a704af..ad5ef73 100644 --- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c +++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -mavx2 -fdump-tree-vect-details -mtune=skylake" } */ +/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */ #include "avx2-gather-1.c" /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 16 "vect" } } */ +/* { dg-final { scan-assembler "vpcmpeqd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_d-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_d-1.c new file mode 100644 index 0000000..af047b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_d-1.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 2 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsrad" 1 } } */ +/* { dg-final { scan-assembler-times "vpslld" 1 } } */ +/* { dg-final { scan-assembler-times "vpsravd" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvd" 1 } } */ + + +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; + +#ifndef NUM +#define NUM 800 +#endif +#ifndef TYPE +#define TYPE int +#endif + +TYPE a[NUM], b[NUM], c[NUM], d[NUM], e[NUM], j[NUM]; +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X,Y) ((X) < (Y) ? (Y) : (X)) + +#define BINC(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O3"))) \ + foo_##OPNAME##_const () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + a[i] = d[i] OP 3; \ + else \ + a[i] = MAX(d[i], e[i]); \ + } + +#define BINV(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O3"))) \ + foo_##OPNAME##_variable () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + a[i] = d[i] OP e[i]; \ + else \ + a[i] = MAX(d[i], e[i]); \ + } + +BINC (shl, <<); +BINC (shr, >>); +BINV (shl, <<); +BINV (shr, >>); diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_d-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_d-2.c new file mode 100644 index 0000000..449e5b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_d-2.c @@ -0,0 +1,102 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512VL +#ifndef CHECK +#define CHECK "avx512f-helper.h" +#endif + +#include CHECK + +#include "cond_op_shift_d-1.c" + +#define BINO2C(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O2"))) \ + foo_o2_##OPNAME##_const () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + j[i] = d[i] OP 3; \ + else \ + j[i] = MAX(d[i], e[i]); \ + } + +#define BINO2V(OPNAME, OP) \ + void \ + __attribute__ ((noipa,optimize ("O2"))) \ + foo_o2_##OPNAME##_variable () \ + { \ + for (int i = 0; i != NUM; i++) \ + if (b[i] < c[i]) \ + j[i] = d[i] OP e[i]; \ + else \ + j[i] = MAX(d[i], e[i]); \ + } + +BINO2C (shl, <<); +BINO2C (shr, >>); +BINO2V (shl, <<); +BINO2V (shr, >>); + +static void +test_256 (void) +{ + int sign = -1; + for (int i = 0; i != NUM; i++) + { + a[i] = 0; + d[i] = i * 2; + e[i] = (i * i * 3 - i * 9 + 6)%8; + b[i] = i * 83; + c[i] = b[i] + sign; + sign *= -1; + j[i] = 1; + } + foo_shl_const (); + foo_o2_shl_const (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + b[i] = 1; + } + + foo_shr_const (); + foo_o2_shr_const (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + j[i] = 1; + } + + foo_shl_variable (); + foo_o2_shl_variable (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + b[i] = 1; + } + + foo_shr_variable (); + foo_o2_shr_variable (); + for (int i = 0; i != NUM; i++) + { + if (a[i] != j[i]) + abort (); + a[i] = 0; + j[i] = 1; + } +} + +static void +test_128 () +{ + +} diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_q-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_q-1.c new file mode 100644 index 0000000..1b981b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_q-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=int64" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 2 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsravq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsravq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvq" 1 } } */ + + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_q-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_q-2.c new file mode 100644 index 0000000..94f1d71 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_q-2.c @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -DTYPE=int64" } */ +/* { dg-require-effective-target avx512vl } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-1.c new file mode 100644 index 0000000..eea0f67 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=uint32" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 2 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsrlvd" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvd" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrlvd" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllvd" 1 } } */ + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-2.c new file mode 100644 index 0000000..b18c568 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_ud-2.c @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -DTYPE=uint32" } */ +/* { dg-require-effective-target avx512vl } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-1.c new file mode 100644 index 0000000..77a0388 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=uint64" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 2 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsrlq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrlq" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllq" 1 } } */ + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-2.c new file mode 100644 index 0000000..a9e0acf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_uq-2.c @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mprefer-vector-width=256 -DTYPE=uint64" } */ +/* { dg-require-effective-target avx512vl } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-1.c new file mode 100644 index 0000000..b84cdd89 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=uint16" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 1 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsrlw" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllw" 1 } } */ + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-2.c new file mode 100644 index 0000000..cfdece9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_uw-2.c @@ -0,0 +1,6 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -mprefer-vector-width=256 -DTYPE=uint16" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_w-1.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_w-1.c new file mode 100644 index 0000000..54c854f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_w-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake-avx512 -fdump-tree-optimized -DTYPE=int16" } */ +/* { dg-final { scan-tree-dump-times ".COND_SHR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times ".COND_SHL" 1 "optimized" } } */ +/* { dg-final { scan-assembler-times "vpsraw" 1 } } */ +/* { dg-final { scan-assembler-times "vpsllw" 1 } } */ + +#include "cond_op_shift_d-1.c" diff --git a/gcc/testsuite/gcc.target/i386/cond_op_shift_w-2.c b/gcc/testsuite/gcc.target/i386/cond_op_shift_w-2.c new file mode 100644 index 0000000..5776826 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cond_op_shift_w-2.c @@ -0,0 +1,6 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -mprefer-vector-width=256 -DTYPE=int16" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ + +#include "cond_op_shift_d-2.c" diff --git a/gcc/testsuite/gcc.target/i386/pr101812.c b/gcc/testsuite/gcc.target/i386/pr101812.c new file mode 100644 index 0000000..07e84a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101812.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -ftree-loop-vectorize -fvect-cost-model=unlimited" } */ + +#define LTGT(a, b) (__builtin_islessgreater (a, b) ? a : b) +void foo (int ilast,float* w, float* w2) +{ + int i; + for (i = 0; i < ilast; ++i) + { + w[i] = LTGT (0.0f, w2[i]); + } +} diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_1.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_1.c index bb56ca0..d0d036a 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_1.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_1.c @@ -1,6 +1,6 @@ /* Test F2008 18.5: ISO_Fortran_binding.h functions. */ -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_10.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_10.c index c3954e4..91222ff 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_10.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_10.c @@ -2,7 +2,7 @@ /* Contributed by Reinhold Bader <Bader@lrz.de> */ -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> #include <stdlib.h> #include <stdio.h> #include <stdbool.h> diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_11.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_11.c index c2d4e11..e013011 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_11.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_11.c @@ -5,7 +5,7 @@ Contributed by Reinhold Bader <Bader@lrz.de>#include <stdio.h> */ #include <stdlib.h> #include <stddef.h> #include <stdio.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> typedef struct { diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_12.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_12.c index 078c5de..0a41576 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_12.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_12.c @@ -2,7 +2,7 @@ #include <stdio.h> #include <math.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> /* Contributed by Reinhold Bader <Bader@lrz.de> */ diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_15.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_15.c index 622f2de..fc70da4 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_15.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_15.c @@ -4,7 +4,7 @@ #include <stdlib.h> #include <stdio.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> // Prototype for Fortran functions extern void Fsub(CFI_cdesc_t *); diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_16.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_16.c index 50b92ec..915b6e7 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_16.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_16.c @@ -1,6 +1,6 @@ /* Test the fix for PR92142. */ -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> #include <stdlib.h> diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c index b0893cc..fa75268 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c @@ -2,7 +2,7 @@ #include <stdio.h> #include <assert.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> void Csub(const CFI_cdesc_t *, size_t, CFI_index_t invalid); diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_18.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_18.c index ef40134..5a3952c 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_18.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_18.c @@ -1,6 +1,6 @@ #include <stdlib.h> #include <string.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> extern int do_loop(CFI_cdesc_t* array); diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_3.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_3.c index 9f35b0d..33d1bc3 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_3.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_3.c @@ -1,4 +1,4 @@ -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> #include <stdio.h> #include <stdlib.h> diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_5.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_5.c index 116f548..b18a899 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_5.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_5.c @@ -4,7 +4,7 @@ #include <stdio.h> #include <math.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> typedef struct { int i; diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_6.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_6.c index 704b27c..c7981c5 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_6.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_6.c @@ -4,7 +4,7 @@ #include <stdio.h> #include <math.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> #define DEBUG 0 diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_7.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_7.c index 26b4ab5..8162451 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_7.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_7.c @@ -2,7 +2,7 @@ /* Contributed by Reinhold Bader <Bader@lrz.de> */ -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> #include <stdio.h> #include <stdlib.h> #include <math.h> diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_8.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_8.c index a0d1bdc..d3dce3a 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_8.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_8.c @@ -2,7 +2,7 @@ /* Contributed by Reinhold Bader <Bader@lrz.de> */ -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> #include <stdio.h> float Cxgl[] = { 1.1, 2.3, 5.1, 4.2 }; diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_9.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_9.c index 632604f..cb17077 100644 --- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_9.c +++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_9.c @@ -2,7 +2,7 @@ /* Contributed by Gilles Gouaillardet <gilles@rist.or.jp> */ -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> #include <stdlib.h> int cdesc_c(CFI_cdesc_t* x, long *expected) diff --git a/gcc/testsuite/gfortran.dg/PR94327.c b/gcc/testsuite/gfortran.dg/PR94327.c index 4ce408d..9d22681 100644 --- a/gcc/testsuite/gfortran.dg/PR94327.c +++ b/gcc/testsuite/gfortran.dg/PR94327.c @@ -4,7 +4,7 @@ #include <stdbool.h> #include <stdlib.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> bool c_vrfy (const CFI_cdesc_t *restrict); diff --git a/gcc/testsuite/gfortran.dg/PR94331.c b/gcc/testsuite/gfortran.dg/PR94331.c index 2fbfe0e..df571c7 100644 --- a/gcc/testsuite/gfortran.dg/PR94331.c +++ b/gcc/testsuite/gfortran.dg/PR94331.c @@ -4,7 +4,7 @@ #include <stdbool.h> #include <stdlib.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> bool c_vrfy (const CFI_cdesc_t *restrict); diff --git a/gcc/testsuite/gfortran.dg/bind_c_array_params_3_aux.c b/gcc/testsuite/gfortran.dg/bind_c_array_params_3_aux.c index 5176d8b..4594185 100644 --- a/gcc/testsuite/gfortran.dg/bind_c_array_params_3_aux.c +++ b/gcc/testsuite/gfortran.dg/bind_c_array_params_3_aux.c @@ -5,7 +5,7 @@ #include <errno.h> #include <stdio.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> void arr_set_c(CFI_cdesc_t*); diff --git a/gcc/testsuite/gfortran.dg/iso_fortran_binding_uint8_array_driver.c b/gcc/testsuite/gfortran.dg/iso_fortran_binding_uint8_array_driver.c index bfd567b..9c2b5fb 100644 --- a/gcc/testsuite/gfortran.dg/iso_fortran_binding_uint8_array_driver.c +++ b/gcc/testsuite/gfortran.dg/iso_fortran_binding_uint8_array_driver.c @@ -1,7 +1,7 @@ #include <stdlib.h> #include <stdio.h> #include <inttypes.h> -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> extern void fsub(CFI_cdesc_t *); diff --git a/gcc/testsuite/gfortran.dg/pr93524.c b/gcc/testsuite/gfortran.dg/pr93524.c index ba40d00..8a6c066 100644 --- a/gcc/testsuite/gfortran.dg/pr93524.c +++ b/gcc/testsuite/gfortran.dg/pr93524.c @@ -2,7 +2,7 @@ sm incorrectly for dimensions > 2. */ #include <stdlib.h> // For size_t -#include "ISO_Fortran_binding.h" +#include <ISO_Fortran_binding.h> void my_fortran_sub_1 (CFI_cdesc_t *dv); void my_fortran_sub_2 (CFI_cdesc_t *dv); diff --git a/gcc/testsuite/lib/gfortran.exp b/gcc/testsuite/lib/gfortran.exp index 1e7da11..cae6738 100644 --- a/gcc/testsuite/lib/gfortran.exp +++ b/gcc/testsuite/lib/gfortran.exp @@ -151,6 +151,7 @@ proc gfortran_init { args } { global gcc_warning_prefix global gcc_error_prefix global TEST_ALWAYS_FLAGS + global gfortran_init_set_GFORTRAN_UNDER_TEST # We set LC_ALL and LANG to C so that we get the same error messages as expected. setenv LC_ALL C @@ -166,7 +167,11 @@ proc gfortran_init { args } { setenv LANG C.ASCII } - if ![info exists GFORTRAN_UNDER_TEST] then { + # GFORTRAN_UNDER_TEST as set below contains $specpath, which depends on + # the used multilib config. Thus, its value may need to be reset; + # that's tracked via gfortran_init_set_GFORTRAN_UNDER_TEST. + if { ![info exists GFORTRAN_UNDER_TEST] + || [info exists gfortran_init_set_GFORTRAN_UNDER_TEST] } then { if [info exists TOOL_EXECUTABLE] { set GFORTRAN_UNDER_TEST $TOOL_EXECUTABLE } else { @@ -178,7 +183,8 @@ proc gfortran_init { args } { } else { set specpath [get_multilibs] } - set GFORTRAN_UNDER_TEST [findfile $base_dir/../../gfortran "$base_dir/../../gfortran -B$base_dir/../../ -B$specpath/libgfortran/" [findfile $base_dir/gfortran "$base_dir/gfortran -B$base_dir/" [transform gfortran]]] + set gfortran_init_set_GFORTRAN_UNDER_TEST 1 + set GFORTRAN_UNDER_TEST [findfile $base_dir/../../gfortran "$base_dir/../../gfortran -B$base_dir/../../ -B$specpath/libgfortran/ -I$specpath/libgfortran" [findfile $base_dir/gfortran "$base_dir/gfortran -B$base_dir/" [transform gfortran]]] } } } diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 5484ad5..83941bc 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -425,6 +425,7 @@ extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt); extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt); extern gimple_opt_pass *make_pass_oacc_loop_designation (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_omp_oacc_neuter_broadcast (gcc::context *ctxt); extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt); extern gimple_opt_pass *make_pass_omp_device_lower (gcc::context *ctxt); extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt); diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c index 9ce6214..003c9c2 100644 --- a/gcc/tree-ssa-ccp.c +++ b/gcc/tree-ssa-ccp.c @@ -1293,6 +1293,28 @@ ccp_fold (gimple *stmt) } } +/* Determine the minimum and maximum values, *MIN and *MAX respectively, + represented by the mask pair VAL and MASK with signedness SGN and + precision PRECISION. */ + +void +value_mask_to_min_max (widest_int *min, widest_int *max, + const widest_int &val, const widest_int &mask, + signop sgn, int precision) +{ + *min = wi::bit_and_not (val, mask); + *max = val | mask; + if (sgn == SIGNED && wi::neg_p (mask)) + { + widest_int sign_bit = wi::lshift (1, precision - 1); + *min ^= sign_bit; + *max ^= sign_bit; + /* MAX is zero extended, and MIN is sign extended. */ + *min = wi::ext (*min, precision, sgn); + *max = wi::ext (*max, precision, sgn); + } +} + /* Apply the operation CODE in type TYPE to the value, mask pair RVAL and RMASK representing a value of type RTYPE and set the value, mask pair *VAL and *MASK to the result. */ @@ -1334,6 +1356,33 @@ bit_value_unop (enum tree_code code, signop type_sgn, int type_precision, break; } + case ABS_EXPR: + case ABSU_EXPR: + if (wi::sext (rmask, rtype_precision) == -1) + *mask = -1; + else if (wi::neg_p (rmask)) + { + /* Result is either rval or -rval. */ + widest_int temv, temm; + bit_value_unop (NEGATE_EXPR, rtype_sgn, rtype_precision, &temv, + &temm, type_sgn, type_precision, rval, rmask); + temm |= (rmask | (rval ^ temv)); + /* Extend the result. */ + *mask = wi::ext (temm, type_precision, type_sgn); + *val = wi::ext (temv, type_precision, type_sgn); + } + else if (wi::neg_p (rval)) + { + bit_value_unop (NEGATE_EXPR, type_sgn, type_precision, val, mask, + type_sgn, type_precision, rval, rmask); + } + else + { + *mask = rmask; + *val = rval; + } + break; + default: *mask = -1; break; @@ -1357,6 +1406,8 @@ bit_value_binop (enum tree_code code, signop sgn, int width, /* Assume we'll get a constant result. Use an initial non varying value, we fall back to varying in the end if necessary. */ *mask = -1; + /* Ensure that VAL is initialized (to any value). */ + *val = 0; switch (code) { @@ -1527,6 +1578,7 @@ bit_value_binop (enum tree_code code, signop sgn, int width, case LT_EXPR: case LE_EXPR: { + widest_int min1, max1, min2, max2; int minmax, maxmin; const widest_int &o1val = swap_p ? r2val : r1val; @@ -1534,26 +1586,21 @@ bit_value_binop (enum tree_code code, signop sgn, int width, const widest_int &o2val = swap_p ? r1val : r2val; const widest_int &o2mask = swap_p ? r1mask : r2mask; - /* If the most significant bits are not known we know nothing. */ - if (wi::neg_p (o1mask) || wi::neg_p (o2mask)) - break; + value_mask_to_min_max (&min1, &max1, o1val, o1mask, + r1type_sgn, r1type_precision); + value_mask_to_min_max (&min2, &max2, o2val, o2mask, + r1type_sgn, r1type_precision); /* For comparisons the signedness is in the comparison operands. */ - sgn = r1type_sgn; - - /* If we know the most significant bits we know the values - value ranges by means of treating varying bits as zero - or one. Do a cross comparison of the max/min pairs. */ - maxmin = wi::cmp (o1val | o1mask, - wi::bit_and_not (o2val, o2mask), sgn); - minmax = wi::cmp (wi::bit_and_not (o1val, o1mask), - o2val | o2mask, sgn); - if (maxmin < 0) /* o1 is less than o2. */ + /* Do a cross comparison of the max/min pairs. */ + maxmin = wi::cmp (max1, min2, r1type_sgn); + minmax = wi::cmp (min1, max2, r1type_sgn); + if (maxmin < (code == LE_EXPR ? 1: 0)) /* o1 < or <= o2. */ { *mask = 0; *val = 1; } - else if (minmax > 0) /* o1 is not less or equal to o2. */ + else if (minmax > (code == LT_EXPR ? -1 : 0)) /* o1 >= or > o2. */ { *mask = 0; *val = 0; @@ -1574,6 +1621,49 @@ bit_value_binop (enum tree_code code, signop sgn, int width, break; } + case MIN_EXPR: + case MAX_EXPR: + { + widest_int min1, max1, min2, max2; + + value_mask_to_min_max (&min1, &max1, r1val, r1mask, sgn, width); + value_mask_to_min_max (&min2, &max2, r2val, r2mask, sgn, width); + + if (wi::cmp (max1, min2, sgn) <= 0) /* r1 is less than r2. */ + { + if (code == MIN_EXPR) + { + *mask = r1mask; + *val = r1val; + } + else + { + *mask = r2mask; + *val = r2val; + } + } + else if (wi::cmp (min1, max2, sgn) >= 0) /* r2 is less than r1. */ + { + if (code == MIN_EXPR) + { + *mask = r2mask; + *val = r2val; + } + else + { + *mask = r1mask; + *val = r1val; + } + } + else + { + /* The result is either r1 or r2. */ + *mask = r1mask | r2mask | (r1val ^ r2val); + *val = r1val; + } + break; + } + default:; } } |