diff options
Diffstat (limited to 'gcc')
62 files changed, 6930 insertions, 104 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index f5ee1e3..4a4bfc2 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -183,6 +183,8 @@ dfp.o-warn = -Wno-error bitmap.o-warn = -Wno-error # dominance.c contains a -Wc++compat warning. dominance.o-warn = -Wno-error +# graphite.c contains code calling cloog that has problems. +graphite.o-warn = -Wno-error # All warnings have to be shut off in stage1 if the compiler used then # isn't gcc; configure determines that. WARN_CFLAGS will be either @@ -281,6 +283,14 @@ ZLIBINC = @zlibinc@ GMPLIBS = @GMPLIBS@ GMPINC = @GMPINC@ +# How to find PPL +PPLLIBS = @PPLLIBS@ +PPLINC = @PPLINC@ + +# How to find CLOOG +CLOOGLIBS = @CLOOGLIBS@ +CLOOGINC = @CLOOGINC@ + CPPLIB = ../libcpp/libcpp.a CPPINC = -I$(srcdir)/../libcpp/include @@ -891,7 +901,8 @@ BUILD_LIBDEPS= $(BUILD_LIBIBERTY) # How to link with both our special library facilities # and the system's installed libraries. -LIBS = @LIBS@ $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBIBERTY) $(LIBDECNUMBER) +LIBS = @LIBS@ $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBIBERTY) $(LIBDECNUMBER) \ + $(GMPLIBS) $(CLOOGLIBS) $(PPLLIBS) # Any system libraries needed just for GNAT. SYSLIBS = @GNAT_LIBEXC@ @@ -920,7 +931,8 @@ BUILD_ERRORS = build/errors.o # libintl.h will be found in ../intl if we are using the included libintl. INCLUDES = -I. -I$(@D) -I$(srcdir) -I$(srcdir)/$(@D) \ -I$(srcdir)/../include @INCINTL@ \ - $(CPPINC) $(GMPINC) $(DECNUMINC) + $(CPPINC) $(GMPINC) $(DECNUMINC) \ + $(PPLINC) $(CLOOGINC) .c.o: $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $< $(OUTPUT_OPTION) @@ -1094,6 +1106,7 @@ OBJS-common = \ global.o \ graph.o \ graphds.o \ + graphite.o \ gtype-desc.o \ haifa-sched.o \ hooks.o \ @@ -2340,6 +2353,10 @@ tree-data-ref.o: tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ $(TREE_DATA_REF_H) $(SCEV_H) tree-pass.h tree-chrec.h langhooks.h +graphite.o: graphite.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) \ + $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) domwalk.h \ + $(TREE_DATA_REF_H) $(SCEV_H) tree-pass.h tree-chrec.h graphite.h pointer-set.h tree-vect-analyze.o: tree-vect-analyze.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RECOG_H) $(BASIC_BLOCK_H) \ $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \ diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index 0e95323..f6fe623 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -1620,3 +1620,18 @@ single_exit (const struct loop *loop) else return NULL; } + +/* Returns true when BB has an edge exiting LOOP. */ + +bool +is_loop_exit (struct loop *loop, basic_block bb) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->preds) + if (loop_exit_edge_p (loop, e)) + return true; + + return false; +} diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index aa3e611..37bf993 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -228,6 +228,7 @@ extern int num_loop_insns (const struct loop *); extern int average_num_loop_insns (const struct loop *); extern unsigned get_loop_level (const struct loop *); extern bool loop_exit_edge_p (const struct loop *, const_edge); +extern bool is_loop_exit (struct loop *, basic_block); extern void mark_loop_exit_edges (void); /* Loops & cfg manipulation. */ @@ -284,6 +285,9 @@ extern bool can_duplicate_loop_p (const struct loop *loop); #define DLTHE_FLAG_COMPLETTE_PEEL 4 /* Update frequencies expecting a complete peeling. */ +extern edge create_empty_if_region_on_edge (edge, tree); +extern struct loop *create_empty_loop_on_edge (edge, tree, tree, tree, tree, + tree *, struct loop *); extern struct loop * duplicate_loop (struct loop *, struct loop *); extern bool duplicate_loop_to_header_edge (struct loop *, edge, unsigned, sbitmap, edge, diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 025b5be10..d8979b4 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see #include "cfglayout.h" #include "cfghooks.h" #include "output.h" +#include "tree-flow.h" static void duplicate_subloops (struct loop *, struct loop *); static void copy_loops_to (struct loop **, int, @@ -466,6 +467,243 @@ scale_loop_frequencies (struct loop *loop, int num, int den) free (bbs); } +/* Recompute dominance information for basic blocks outside LOOP. */ + +static void +update_dominators_in_loop (struct loop *loop) +{ + VEC (basic_block, heap) *dom_bbs = NULL; + sbitmap seen; + basic_block *body; + unsigned i; + + seen = sbitmap_alloc (last_basic_block); + sbitmap_zero (seen); + body = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes; i++) + SET_BIT (seen, body[i]->index); + + for (i = 0; i < loop->num_nodes; i++) + { + basic_block ldom; + + for (ldom = first_dom_son (CDI_DOMINATORS, body[i]); + ldom; + ldom = next_dom_son (CDI_DOMINATORS, ldom)) + if (!TEST_BIT (seen, ldom->index)) + { + SET_BIT (seen, ldom->index); + VEC_safe_push (basic_block, heap, dom_bbs, ldom); + } + } + + iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); + free (body); + free (seen); + VEC_free (basic_block, heap, dom_bbs); +} + +/* Creates an if region as shown above. CONDITION is used to create + the test for the if. + + | + | ------------- ------------- + | | pred_bb | | pred_bb | + | ------------- ------------- + | | | + | | | ENTRY_EDGE + | | ENTRY_EDGE V + | | ====> ------------- + | | | cond_bb | + | | | CONDITION | + | | ------------- + | V / \ + | ------------- e_false / \ e_true + | | succ_bb | V V + | ------------- ----------- ----------- + | | false_bb | | true_bb | + | ----------- ----------- + | \ / + | \ / + | V V + | ------------- + | | join_bb | + | ------------- + | | exit_edge (result) + | V + | ----------- + | | succ_bb | + | ----------- + | + */ + +edge +create_empty_if_region_on_edge (edge entry_edge, tree condition) +{ + + basic_block succ_bb, cond_bb, true_bb, false_bb, join_bb; + edge e_true, e_false, exit_edge; + gimple cond_stmt; + tree simple_cond; + gimple_stmt_iterator gsi; + + succ_bb = entry_edge->dest; + cond_bb = split_edge (entry_edge); + + /* Insert condition in cond_bb. */ + gsi = gsi_last_bb (cond_bb); + simple_cond = + force_gimple_operand_gsi (&gsi, condition, true, NULL, + false, GSI_NEW_STMT); + cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE); + gsi = gsi_last_bb (cond_bb); + gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); + + join_bb = split_edge (single_succ_edge (cond_bb)); + + e_true = single_succ_edge (cond_bb); + true_bb = split_edge (e_true); + + e_false = make_edge (cond_bb, join_bb, 0); + false_bb = split_edge (e_false); + + e_true->flags &= ~EDGE_FALLTHRU; + e_true->flags |= EDGE_TRUE_VALUE; + e_false->flags &= ~EDGE_FALLTHRU; + e_false->flags |= EDGE_FALSE_VALUE; + + set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src); + set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb); + + exit_edge = single_succ_edge (join_bb); + + if (single_pred_p (exit_edge->dest)) + set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb); + + return exit_edge; +} + +/* create_empty_loop_on_edge + | + | ------------- ------------------------ + | | pred_bb | | pred_bb | + | ------------- | IV_0 = INITIAL_VALUE | + | | ------------------------ + | | ______ | ENTRY_EDGE + | | ENTRY_EDGE / V V + | | ====> | ----------------------------- + | | | | IV_BEFORE = phi (IV_0, IV) | + | | | | loop_header | + | V | | IV_BEFORE <= UPPER_BOUND | + | ------------- | -----------------------\----- + | | succ_bb | | | \ + | ------------- | | \ exit_e + | | V V--------- + | | -------------- | succ_bb | + | | | loop_latch | ---------- + | | |IV = IV_BEFORE + STRIDE + | | -------------- + | \ / + | \ ___ / + + Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME + that is used before the increment of IV. IV_BEFORE should be used for + adding code to the body that uses the IV. OUTER is the outer loop in + which the new loop should be inserted. */ + +struct loop * +create_empty_loop_on_edge (edge entry_edge, + tree initial_value, + tree stride, tree upper_bound, + tree iv, + tree *iv_before, + struct loop *outer) +{ + basic_block loop_header, loop_latch, succ_bb, pred_bb; + struct loop *loop; + int freq; + gcov_type cnt; + gimple_stmt_iterator gsi; + bool insert_after; + gimple_seq stmts; + gimple cond_expr; + tree exit_test; + edge exit_e; + int prob; + tree upper_bound_gimplified; + + gcc_assert (entry_edge && initial_value && stride && upper_bound && iv); + + /* Create header, latch and wire up the loop. */ + pred_bb = entry_edge->src; + loop_header = split_edge (entry_edge); + loop_latch = split_edge (single_succ_edge (loop_header)); + succ_bb = single_succ (loop_latch); + make_edge (loop_header, succ_bb, 0); + redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header); + + /* Set immediate dominator information. */ + set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb); + set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header); + set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header); + + /* Initialize a loop structure and put it in a loop hierarchy. */ + loop = alloc_loop (); + loop->header = loop_header; + loop->latch = loop_latch; + add_loop (loop, outer); + + /* TODO: Fix frequencies and counts. */ + freq = EDGE_FREQUENCY (entry_edge); + cnt = entry_edge->count; + + prob = REG_BR_PROB_BASE / 2; + + scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE); + + /* Update dominators. */ + update_dominators_in_loop (loop); + + /* Construct IV code in loop. */ + initial_value = force_gimple_operand (initial_value, &stmts, true, iv); + if (stmts) + { + gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts); + gsi_commit_edge_inserts (); + } + + standard_iv_increment_position (loop, &gsi, &insert_after); + create_iv (initial_value, stride, iv, loop, &gsi, insert_after, + iv_before, NULL); + + /* Modify edge flags. */ + exit_e = single_exit (loop); + exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE; + single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE; + + gsi = gsi_last_bb (exit_e->src); + + upper_bound_gimplified = + force_gimple_operand_gsi (&gsi, upper_bound, true, NULL, + false, GSI_NEW_STMT); + gsi = gsi_last_bb (exit_e->src); + + cond_expr = gimple_build_cond + (LE_EXPR, *iv_before, upper_bound_gimplified, NULL_TREE, NULL_TREE); + + exit_test = gimple_cond_lhs (cond_expr); + exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL, + false, GSI_NEW_STMT); + gimple_cond_set_lhs (cond_expr, exit_test); + gsi = gsi_last_bb (exit_e->src); + gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT); + + return loop; +} + /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting latch to header and update loop tree and dominators accordingly. Everything between them plus LATCH_EDGE destination must @@ -483,10 +721,6 @@ loopify (edge latch_edge, edge header_edge, { basic_block succ_bb = latch_edge->dest; basic_block pred_bb = header_edge->src; - basic_block *body; - VEC (basic_block, heap) *dom_bbs; - unsigned i; - sbitmap seen; struct loop *loop = alloc_loop (); struct loop *outer = loop_outer (succ_bb->loop_father); int freq; @@ -538,35 +772,7 @@ loopify (edge latch_edge, edge header_edge, } scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE); scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE); - - /* Update dominators of blocks outside of LOOP. */ - dom_bbs = NULL; - seen = sbitmap_alloc (last_basic_block); - sbitmap_zero (seen); - body = get_loop_body (loop); - - for (i = 0; i < loop->num_nodes; i++) - SET_BIT (seen, body[i]->index); - - for (i = 0; i < loop->num_nodes; i++) - { - basic_block ldom; - - for (ldom = first_dom_son (CDI_DOMINATORS, body[i]); - ldom; - ldom = next_dom_son (CDI_DOMINATORS, ldom)) - if (!TEST_BIT (seen, ldom->index)) - { - SET_BIT (seen, ldom->index); - VEC_safe_push (basic_block, heap, dom_bbs, ldom); - } - } - - iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); - - free (body); - free (seen); - VEC_free (basic_block, heap, dom_bbs); + update_dominators_in_loop (loop); return loop; } diff --git a/gcc/common.opt b/gcc/common.opt index 87824f5..e7f8159 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -547,6 +547,22 @@ Common Report Var(flag_gcse_after_reload) Optimization Perform global common subexpression elimination after register allocation has finished +fgraphite +Common Report Var(flag_graphite) +Enable in and out of Graphite representation + +floop-strip-mine +Common Report Var(flag_loop_strip_mine) Optimization +Enable Loop Strip Mining transformation + +floop-interchange +Common Report Var(flag_loop_interchange) Optimization +Enable Loop Interchange transformation + +floop-block +Common Report Var(flag_loop_block) Optimization +Enable Loop Blocking transformation + fguess-branch-probability Common Report Var(flag_guess_branch_prob) Optimization Enable guessing of branch probabilities diff --git a/gcc/config.in b/gcc/config.in index f4604d2..17bdeb4 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -1327,6 +1327,12 @@ #endif +/* Define if cloog is in use. */ +#ifndef USED_FOR_TARGET +#undef HAVE_cloog +#endif + + /* Define as const if the declaration of iconv() needs const. */ #ifndef USED_FOR_TARGET #undef ICONV_CONST diff --git a/gcc/configure b/gcc/configure index 6605def..aff51d6 100755 --- a/gcc/configure +++ b/gcc/configure @@ -458,7 +458,7 @@ ac_includes_default="\ # include <unistd.h> #endif" -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os target_noncanonical build_libsubdir build_subdir host_subdir target_subdir GENINSRC CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT GNATBIND ac_ct_GNATBIND GNATMAKE ac_ct_GNATMAKE NO_MINUS_C_MINUS_O OUTPUT_OPTION CPP EGREP loose_warn strict_warn warn_cflags nocommon_flag TREEBROWSER valgrind_path valgrind_path_defines valgrind_command coverage_flags enable_multilib enable_decimal_float enable_fixed_point enable_shared TARGET_SYSTEM_ROOT TARGET_SYSTEM_ROOT_DEFINE CROSS_SYSTEM_HEADER_DIR onestep PKGVERSION REPORT_BUGS_TO REPORT_BUGS_TEXI datarootdir docdir htmldir SET_MAKE AWK LN_S LN RANLIB ac_ct_RANLIB ranlib_flags INSTALL INSTALL_PROGRAM INSTALL_DATA make_compare_target have_mktemp_command MAKEINFO BUILD_INFO GENERATED_MANPAGES FLEX BISON NM AR COLLECT2_LIBS GNAT_LIBEXC LDEXP_LIB TARGET_GETGROUPS_T LIBICONV LTLIBICONV LIBICONV_DEP manext objext gthread_flags extra_modes_file extra_opt_files USE_NLS LIBINTL LIBINTL_DEP INCINTL XGETTEXT GMSGFMT POSUB CATALOGS DATADIRNAME INSTOBJEXT GENCAT CATOBJEXT CROSS ALL SYSTEM_HEADER_DIR inhibit_libc CC_FOR_BUILD BUILD_CFLAGS BUILD_LDFLAGS STMP_FIXINC STMP_FIXPROTO collect2 LIBTOOL SED FGREP GREP LD DUMPBIN ac_ct_DUMPBIN ac_ct_AR STRIP ac_ct_STRIP lt_ECHO objdir enable_fast_install gcc_cv_as ORIGINAL_AS_FOR_TARGET gcc_cv_ld ORIGINAL_LD_FOR_TARGET gcc_cv_nm ORIGINAL_NM_FOR_TARGET gcc_cv_objdump libgcc_visibility GGC zlibdir zlibinc MAINT gcc_tooldir dollar slibdir subdirs srcdir all_compilers all_gtfiles all_lang_makefrags all_lang_makefiles all_languages all_selected_languages build_exeext build_install_headers_dir build_xm_file_list build_xm_include_list build_xm_defines build_file_translate check_languages cpp_install_dir xmake_file tmake_file extra_gcc_objs extra_headers_list extra_objs extra_parts extra_passes extra_programs float_h_file gcc_config_arguments gcc_gxx_include_dir host_exeext host_xm_file_list host_xm_include_list host_xm_defines out_host_hook_obj install lang_opt_files lang_specs_files lang_tree_files local_prefix md_file objc_boehm_gc out_file out_object_file thread_file tm_file_list tm_include_list tm_defines tm_p_file_list tm_p_include_list xm_file_list xm_include_list xm_defines c_target_objs cxx_target_objs fortran_target_objs target_cpu_default GMPLIBS GMPINC LIBOBJS LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os target_noncanonical build_libsubdir build_subdir host_subdir target_subdir GENINSRC CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT GNATBIND ac_ct_GNATBIND GNATMAKE ac_ct_GNATMAKE NO_MINUS_C_MINUS_O OUTPUT_OPTION CPP EGREP loose_warn strict_warn warn_cflags nocommon_flag TREEBROWSER valgrind_path valgrind_path_defines valgrind_command coverage_flags enable_multilib enable_decimal_float enable_fixed_point enable_shared TARGET_SYSTEM_ROOT TARGET_SYSTEM_ROOT_DEFINE CROSS_SYSTEM_HEADER_DIR onestep PKGVERSION REPORT_BUGS_TO REPORT_BUGS_TEXI datarootdir docdir htmldir SET_MAKE AWK LN_S LN RANLIB ac_ct_RANLIB ranlib_flags INSTALL INSTALL_PROGRAM INSTALL_DATA make_compare_target have_mktemp_command MAKEINFO BUILD_INFO GENERATED_MANPAGES FLEX BISON NM AR COLLECT2_LIBS GNAT_LIBEXC LDEXP_LIB TARGET_GETGROUPS_T LIBICONV LTLIBICONV LIBICONV_DEP manext objext gthread_flags extra_modes_file extra_opt_files USE_NLS LIBINTL LIBINTL_DEP INCINTL XGETTEXT GMSGFMT POSUB CATALOGS DATADIRNAME INSTOBJEXT GENCAT CATOBJEXT CROSS ALL SYSTEM_HEADER_DIR inhibit_libc CC_FOR_BUILD BUILD_CFLAGS BUILD_LDFLAGS STMP_FIXINC STMP_FIXPROTO collect2 LIBTOOL SED FGREP GREP LD DUMPBIN ac_ct_DUMPBIN ac_ct_AR STRIP ac_ct_STRIP lt_ECHO objdir enable_fast_install gcc_cv_as ORIGINAL_AS_FOR_TARGET gcc_cv_ld ORIGINAL_LD_FOR_TARGET gcc_cv_nm ORIGINAL_NM_FOR_TARGET gcc_cv_objdump libgcc_visibility GGC zlibdir zlibinc MAINT gcc_tooldir dollar slibdir subdirs srcdir all_compilers all_gtfiles all_lang_makefrags all_lang_makefiles all_languages all_selected_languages build_exeext build_install_headers_dir build_xm_file_list build_xm_include_list build_xm_defines build_file_translate check_languages cpp_install_dir xmake_file tmake_file extra_gcc_objs extra_headers_list extra_objs extra_parts extra_passes extra_programs float_h_file gcc_config_arguments gcc_gxx_include_dir host_exeext host_xm_file_list host_xm_include_list host_xm_defines out_host_hook_obj install lang_opt_files lang_specs_files lang_tree_files local_prefix md_file objc_boehm_gc out_file out_object_file thread_file tm_file_list tm_include_list tm_defines tm_p_file_list tm_p_include_list xm_file_list xm_include_list xm_defines c_target_objs cxx_target_objs fortran_target_objs target_cpu_default GMPLIBS GMPINC PPLLIBS PPLINC CLOOGLIBS CLOOGINC GRAPHITE LIBOBJS LTLIBOBJS' ac_subst_files='language_hooks' ac_pwd=`pwd` @@ -928,6 +928,22 @@ ac_env_GMPINC_set=${GMPINC+set} ac_env_GMPINC_value=$GMPINC ac_cv_env_GMPINC_set=${GMPINC+set} ac_cv_env_GMPINC_value=$GMPINC +ac_env_PPLLIBS_set=${PPLLIBS+set} +ac_env_PPLLIBS_value=$PPLLIBS +ac_cv_env_PPLLIBS_set=${PPLLIBS+set} +ac_cv_env_PPLLIBS_value=$PPLLIBS +ac_env_PPLINC_set=${PPLINC+set} +ac_env_PPLINC_value=$PPLINC +ac_cv_env_PPLINC_set=${PPLINC+set} +ac_cv_env_PPLINC_value=$PPLINC +ac_env_CLOOGLIBS_set=${CLOOGLIBS+set} +ac_env_CLOOGLIBS_value=$CLOOGLIBS +ac_cv_env_CLOOGLIBS_set=${CLOOGLIBS+set} +ac_cv_env_CLOOGLIBS_value=$CLOOGLIBS +ac_env_CLOOGINC_set=${CLOOGINC+set} +ac_env_CLOOGINC_value=$CLOOGINC +ac_cv_env_CLOOGINC_set=${CLOOGINC+set} +ac_cv_env_CLOOGINC_value=$CLOOGINC # # Report the --help message. @@ -1117,6 +1133,10 @@ Some influential environment variables: CPP C preprocessor GMPLIBS How to link GMP GMPINC How to find GMP include files + PPLLIBS How to link PPL + PPLINC How to find PPL include files + CLOOGLIBS How to link CLOOG + CLOOGINC How to find CLOOG include files Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. @@ -14721,13 +14741,13 @@ if test "${lt_cv_nm_interface+set}" = set; then else lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext - (eval echo "\"\$as_me:14724: $ac_compile\"" >&5) + (eval echo "\"\$as_me:14744: $ac_compile\"" >&5) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&5 - (eval echo "\"\$as_me:14727: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval echo "\"\$as_me:14747: $NM \\\"conftest.$ac_objext\\\"\"" >&5) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&5 - (eval echo "\"\$as_me:14730: output\"" >&5) + (eval echo "\"\$as_me:14750: output\"" >&5) cat conftest.out >&5 if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" @@ -15782,7 +15802,7 @@ ia64-*-hpux*) ;; *-*-irix6*) # Find out which ABI we are using. - echo '#line 15785 "configure"' > conftest.$ac_ext + echo '#line 15805 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -16402,11 +16422,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16405: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16425: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:16409: \$? = $ac_status" >&5 + echo "$as_me:16429: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -16724,11 +16744,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16727: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16747: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:16731: \$? = $ac_status" >&5 + echo "$as_me:16751: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -16829,11 +16849,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16832: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16852: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:16836: \$? = $ac_status" >&5 + echo "$as_me:16856: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -16884,11 +16904,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:16887: $lt_compile\"" >&5) + (eval echo "\"\$as_me:16907: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:16891: \$? = $ac_status" >&5 + echo "$as_me:16911: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -19681,7 +19701,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19684 "configure" +#line 19704 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -19781,7 +19801,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19784 "configure" +#line 19804 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -23997,6 +24017,21 @@ fi + + + + + +if test "x${CLOOGLIBS}" != "x" ; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_cloog 1 +_ACEOF + + GRAPHITE=graphite.o +fi + + # Configure the subdirectories # AC_CONFIG_SUBDIRS($subdirs) @@ -24825,6 +24860,11 @@ s,@fortran_target_objs@,$fortran_target_objs,;t t s,@target_cpu_default@,$target_cpu_default,;t t s,@GMPLIBS@,$GMPLIBS,;t t s,@GMPINC@,$GMPINC,;t t +s,@PPLLIBS@,$PPLLIBS,;t t +s,@PPLINC@,$PPLINC,;t t +s,@CLOOGLIBS@,$CLOOGLIBS,;t t +s,@CLOOGINC@,$CLOOGINC,;t t +s,@GRAPHITE@,$GRAPHITE,;t t s,@LIBOBJS@,$LIBOBJS,;t t s,@LTLIBOBJS@,$LTLIBOBJS,;t t /@language_hooks@/r $language_hooks diff --git a/gcc/configure.ac b/gcc/configure.ac index a42ac5c..f4cff3d 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -3883,6 +3883,15 @@ fi AC_ARG_VAR(GMPLIBS,[How to link GMP]) AC_ARG_VAR(GMPINC,[How to find GMP include files]) +AC_ARG_VAR(PPLLIBS,[How to link PPL]) +AC_ARG_VAR(PPLINC,[How to find PPL include files]) + +AC_ARG_VAR(CLOOGLIBS,[How to link CLOOG]) +AC_ARG_VAR(CLOOGINC,[How to find CLOOG include files]) +if test "x${CLOOGLIBS}" != "x" ; then + AC_DEFINE(HAVE_cloog, 1, [Define if cloog is in use.]) +fi + # Configure the subdirectories # AC_CONFIG_SUBDIRS($subdirs) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index aa73f82..5768f08 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -338,6 +338,7 @@ Objective-C and Objective-C++ Dialects}. -fira-coalesce -fno-ira-share-save-slots @gol -fno-ira-share-spill-slots -fira-verbose=@var{n} @gol -fivopts -fkeep-inline-functions -fkeep-static-consts @gol +-floop-block -floop-interchange -floop-strip-mine @gol -fmerge-all-constants -fmerge-constants -fmodulo-sched @gol -fmodulo-sched-allow-regmoves -fmove-loop-invariants -fmudflap @gol -fmudflapir -fmudflapth -fno-branch-count-reg -fno-default-inline @gol @@ -6012,6 +6013,81 @@ at @option{-O} and higher. Perform linear loop transformations on tree. This flag can improve cache performance and allow further loop optimizations to take place. +@item -floop-interchange +Perform loop interchange transformations on loops. Interchanging two +nested loops switches the inner and outer loops. For example, given a +loop like: +@smallexample +DO J = 1, M + DO I = 1, N + A(J, I) = A(J, I) * C + ENDDO +ENDDO +@end smallexample +loop interchange will transform the loop as if the user had written: +@smallexample +DO I = 1, N + DO J = 1, M + A(J, I) = A(J, I) * C + ENDDO +ENDDO +@end smallexample +which can be beneficial when @code{N} is larger than the caches, +because in Fortran, the elements of an array are stored in memory +contiguously by column, and the original loop iterates over rows, +potentially creating at each access a cache miss. This optimization +applies to all the languages supported by GCC and is not limited to +Fortran. + +@item -floop-strip-mine +Perform loop strip mining transformations on loops. Strip mining +splits a loop into two nested loops. The outer loop has strides +equal to the strip size and the inner loop has strides of the +original loop within a strip. For example, given a loop like: +@smallexample +DO I = 1, N + A(I) = A(I) + C +ENDDO +@end smallexample +loop strip mining will transform the loop as if the user had written: +@smallexample +DO II = 1, N, 4 + DO I = II, min (II + 4, N) + A(I) = A(I) + C + ENDDO +ENDDO +@end smallexample +This optimization applies to all the languages supported by GCC and is +not limited to Fortran. + +@item -floop-block +Perform loop blocking transformations on loops. Blocking strip mines +each loop in the loop nest such that the memory accesses of the +element loops fit inside caches. For example, given a loop like: +@smallexample +DO I = 1, N + DO J = 1, M + A(J, I) = B(I) + C(J) + ENDDO +ENDDO +@end smallexample +loop blocking will transform the loop as if the user had written: +@smallexample +DO II = 1, N, 64 + DO JJ = 1, M, 64 + DO I = II, min (II + 64, N) + DO J = JJ, min (JJ + 64, M) + A(J, I) = B(I) + C(J) + ENDDO + ENDDO + ENDDO +ENDDO +@end smallexample +which can be beneficial when @code{M} is larger than the caches, +because the innermost loop will iterate over a smaller amount of data +that can be kept in the caches. This optimization applies to all the +languages supported by GCC and is not limited to Fortran. + @item -fcheck-data-deps @opindex fcheck-data-deps Compare the results of several data dependence analyzers. This option diff --git a/gcc/gdbinit.in b/gcc/gdbinit.in index e8d10e2..9df289c 100644 --- a/gcc/gdbinit.in +++ b/gcc/gdbinit.in @@ -40,6 +40,15 @@ Print the tree that is $ in C syntax. Works only when an inferior is executing. end +define pgg +set debug_gimple_stmt ($) +end + +document pgg +Print the Gimple statement that is $ in C syntax. +Works only when an inferior is executing. +end + define pgs set debug_generic_stmt ($) end diff --git a/gcc/gimple.h b/gcc/gimple.h index e8c0ad6..ca8e644 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -38,6 +38,12 @@ DEF_VEC_P(gimple_seq); DEF_VEC_ALLOC_P(gimple_seq,gc); DEF_VEC_ALLOC_P(gimple_seq,heap); +/* For each block, the PHI nodes that need to be rewritten are stored into + these vectors. */ +typedef VEC(gimple, heap) *gimple_vec; +DEF_VEC_P (gimple_vec); +DEF_VEC_ALLOC_P (gimple_vec, heap); + enum gimple_code { #define DEFGSCODE(SYM, STRING, STRUCT) SYM, #include "gimple.def" diff --git a/gcc/graphite.c b/gcc/graphite.c new file mode 100644 index 0000000..86b0eae --- /dev/null +++ b/gcc/graphite.c @@ -0,0 +1,4806 @@ +/* Gimple Represented as Polyhedra. + Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. + Contributed by Sebastian Pop <sebastian.pop@inria.fr>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* This pass converts GIMPLE to GRAPHITE, performs some loop + transformations and then converts the resulting representation back + to GIMPLE. + + An early description of this pass can be found in the GCC Summit'06 + paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC". + The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to + the related work. + + One important document to read is CLooG's internal manual: + http://repo.or.cz/w/cloog-ppl.git?a=blob_plain;f=doc/cloog.texi;hb=HEAD + that describes the data structure of loops used in this file, and + the functions that are used for transforming the code. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "ggc.h" +#include "tree.h" +#include "rtl.h" +#include "basic-block.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "toplev.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "tree-chrec.h" +#include "tree-data-ref.h" +#include "tree-scalar-evolution.h" +#include "tree-pass.h" +#include "domwalk.h" +#include "pointer-set.h" +#include "gimple.h" + +#ifdef HAVE_cloog +#include "cloog/cloog.h" +#include "graphite.h" + +static VEC (scop_p, heap) *current_scops; + +/* Debug the list of old induction variables for this SCOP. */ + +void +debug_oldivs (scop_p scop) +{ + int i; + name_tree oldiv; + + fprintf (stderr, "Old IVs:"); + + for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++) + { + fprintf (stderr, "("); + print_generic_expr (stderr, oldiv->t, 0); + fprintf (stderr, ", %s, %d)\n", oldiv->name, oldiv->loop->num); + } + fprintf (stderr, "\n"); +} + +/* Debug the loops around basic block GB. */ + +void +debug_loop_vec (graphite_bb_p gb) +{ + int i; + loop_p loop; + + fprintf (stderr, "Loop Vec:"); + + for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++) + fprintf (stderr, "%d: %d, ", i, loop ? loop->num : -1); + + fprintf (stderr, "\n"); +} + +/* Push (IV, NAME) on STACK. */ + +static void +loop_iv_stack_push (loop_iv_stack stack, tree iv, const char *name) +{ + name_tree named_iv = XNEW (struct name_tree); + + named_iv->t = iv; + named_iv->name = name; + VEC_safe_push (name_tree, heap, *stack, named_iv); +} + +/* Pops an element out of STACK. */ + +static void +loop_iv_stack_pop (loop_iv_stack stack) +{ + VEC_pop (name_tree, *stack); +} + +/* Get the IV at INDEX in STACK. */ + +static tree +loop_iv_stack_get_iv (loop_iv_stack stack, int index) +{ + name_tree named_iv = VEC_index (name_tree, *stack, index); + + return named_iv->t; +} + +/* Get the IV from its NAME in STACK. */ + +static tree +loop_iv_stack_get_iv_from_name (loop_iv_stack stack, const char* name) +{ + int i; + name_tree iv; + + for (i = 0; VEC_iterate (name_tree, *stack, i, iv); i++) + if (!strcmp (name, iv->name)) + return iv->t; + + return NULL; +} + +/* Prints on stderr the contents of STACK. */ + +void +loop_iv_stack_debug (loop_iv_stack stack) +{ + int i; + name_tree iv; + bool first = true; + + fprintf (stderr, "("); + + for (i = 0; VEC_iterate (name_tree, *stack, i, iv); i++) + { + if (first) + first = false; + else + fprintf (stderr, " "); + fprintf (stderr, "%s:", iv->name); + print_generic_expr (stderr, iv->t, 0); + } + + fprintf (stderr, ")\n"); +} + +/* In SCOP, get the induction variable from NAME. OLD is the original + loop that contained the definition of NAME. */ + +static name_tree +get_old_iv_from_ssa_name (scop_p scop, loop_p old, tree name) +{ + tree var = SSA_NAME_VAR (name); + int i; + name_tree oldiv; + + for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++) + { + loop_p current = old; + + while (current) + { + if (var == oldiv->t + && oldiv->loop == current) + return oldiv; + + current = loop_outer (current); + } + } + return NULL; + +} + +/* Returns a new loop_to_cloog_loop_str structure. */ + +static inline struct loop_to_cloog_loop_str * +new_loop_to_cloog_loop_str (int loop_num, + int loop_position, + CloogLoop *cloog_loop) +{ + struct loop_to_cloog_loop_str *result; + + result = XNEW (struct loop_to_cloog_loop_str); + result->loop_num = loop_num; + result->cloog_loop = cloog_loop; + result->loop_position = loop_position; + + return result; +} + +/* Hash function for SCOP_LOOP2CLOOG_LOOP hash table. */ + +static hashval_t +hash_loop_to_cloog_loop (const void *elt) +{ + return ((const struct loop_to_cloog_loop_str *) elt)->loop_num; +} + +/* Equality function for SCOP_LOOP2CLOOG_LOOP hash table. */ + +static int +eq_loop_to_cloog_loop (const void *el1, const void *el2) +{ + const struct loop_to_cloog_loop_str *elt1, *elt2; + + elt1 = (const struct loop_to_cloog_loop_str *) el1; + elt2 = (const struct loop_to_cloog_loop_str *) el2; + return elt1->loop_num == elt2->loop_num; +} + +/* Compares two graphite bbs and returns an integer less than, equal to, or + greater than zero if the first argument is considered to be respectively + less than, equal to, or greater than the second. + We compare using the lexicographic order of the static schedules. */ + +static int +gbb_compare (const void *p_1, const void *p_2) +{ + const struct graphite_bb *const gbb_1 + = *(const struct graphite_bb *const*) p_1; + const struct graphite_bb *const gbb_2 + = *(const struct graphite_bb *const*) p_2; + + return lambda_vector_compare (GBB_STATIC_SCHEDULE (gbb_1), + gbb_nb_loops (gbb_1) + 1, + GBB_STATIC_SCHEDULE (gbb_2), + gbb_nb_loops (gbb_2) + 1); +} + +/* Sort graphite bbs in SCOP. */ + +static void +graphite_sort_gbbs (scop_p scop) +{ + VEC (graphite_bb_p, heap) *bbs = SCOP_BBS (scop); + + qsort (VEC_address (graphite_bb_p, bbs), + VEC_length (graphite_bb_p, bbs), + sizeof (graphite_bb_p), gbb_compare); +} + +/* Dump conditions of a graphite basic block GBB on FILE. */ + +static void +dump_gbb_conditions (FILE *file, graphite_bb_p gbb) +{ + int i; + gimple stmt; + VEC (gimple, heap) *conditions = GBB_CONDITIONS (gbb); + + if (VEC_empty (gimple, conditions)) + return; + + fprintf (file, "\tbb %d\t: cond = {", GBB_BB (gbb)->index); + + for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++) + print_gimple_stmt (file, stmt, 0, 0); + + fprintf (file, "}\n"); +} + +/* Converts the graphite scheduling function into a cloog scattering + matrix. This scattering matrix is used to limit the possible cloog + output to valid programs in respect to the scheduling function. + + SCATTERING_DIMENSIONS specifies the dimensionality of the scattering + matrix. CLooG 0.14.0 and previous versions require, that all scattering + functions of one CloogProgram have the same dimensionality, therefore we + allow to specify it. (Should be removed in future versions) */ + +static CloogMatrix * +schedule_to_scattering (graphite_bb_p gb, int scattering_dimensions) +{ + int i; + scop_p scop = GBB_SCOP (gb); + + int nb_iterators = gbb_nb_loops (gb); + + /* The cloog scattering matrix consists of these colums: + 1 col = Eq/Inq, + scattering_dimensions cols = Scattering dimensions, + nb_iterators cols = bb's iterators, + scop_nb_params cols = Parameters, + 1 col = Constant 1. + + Example: + + scattering_dimensions = 5 + max_nb_iterators = 2 + nb_iterators = 1 + scop_nb_params = 2 + + Schedule: + ? i + 4 5 + + Scattering Matrix: + s1 s2 s3 s4 s5 i p1 p2 1 + 1 0 0 0 0 0 0 0 -4 = 0 + 0 1 0 0 0 -1 0 0 0 = 0 + 0 0 1 0 0 0 0 0 -5 = 0 */ + int nb_params = scop_nb_params (scop); + int nb_cols = 1 + scattering_dimensions + nb_iterators + nb_params + 1; + int col_const = nb_cols - 1; + int col_iter_offset = 1 + scattering_dimensions; + + CloogMatrix *scat = cloog_matrix_alloc (scattering_dimensions, nb_cols); + + gcc_assert (scattering_dimensions >= nb_iterators * 2 + 1); + + /* Initialize the identity matrix. */ + for (i = 0; i < scattering_dimensions; i++) + value_set_si (scat->p[i][i + 1], 1); + + /* Textual order outside the first loop */ + value_set_si (scat->p[0][col_const], -GBB_STATIC_SCHEDULE (gb)[0]); + + /* For all surrounding loops. */ + for (i = 0; i < nb_iterators; i++) + { + int schedule = GBB_STATIC_SCHEDULE (gb)[i + 1]; + + /* Iterations of this loop. */ + value_set_si (scat->p[2 * i + 1][col_iter_offset + i], -1); + + /* Textual order inside this loop. */ + value_set_si (scat->p[2 * i + 2][col_const], -schedule); + } + + return scat; +} + +/* Print the schedules of GB to FILE with INDENT white spaces before. + VERBOSITY determines how verbose the code pretty printers are. */ + +void +print_graphite_bb (FILE *file, graphite_bb_p gb, int indent, int verbosity) +{ + CloogMatrix *scattering; + int i; + loop_p loop; + fprintf (file, "\nGBB (\n"); + + print_loops_bb (file, GBB_BB (gb), indent+2, verbosity); + + if (GBB_DOMAIN (gb)) + { + fprintf (file, " (domain: \n"); + cloog_matrix_print (dump_file, GBB_DOMAIN (gb)); + fprintf (file, " )\n"); + } + + if (GBB_STATIC_SCHEDULE (gb)) + { + fprintf (file, " (static schedule: "); + print_lambda_vector (file, GBB_STATIC_SCHEDULE (gb), + gbb_nb_loops (gb) + 1); + fprintf (file, " )\n"); + } + + if (GBB_LOOPS (gb)) + { + fprintf (file, " (contained loops: \n"); + for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++) + if (loop == NULL) + fprintf (file, " iterator %d => NULL \n", i); + else + fprintf (file, " iterator %d => loop %d \n", i, + loop->num); + fprintf (file, " )\n"); + } + + if (GBB_DATA_REFS (gb)) + dump_data_references (file, GBB_DATA_REFS (gb)); + + if (GBB_CONDITIONS (gb)) + { + fprintf (file, " (conditions: \n"); + dump_gbb_conditions (dump_file, gb); + fprintf (file, " )\n"); + } + + if (GBB_SCOP (gb) + && GBB_STATIC_SCHEDULE (gb)) + { + fprintf (file, " (scattering: \n"); + scattering = schedule_to_scattering (gb, 2 * gbb_nb_loops (gb) + 1); + cloog_matrix_print (file, scattering); + cloog_matrix_free (scattering); + fprintf (file, " )\n"); + } + + fprintf (file, ")\n"); +} + +/* Print to STDERR the schedules of GB with VERBOSITY level. */ + +void +debug_gbb (graphite_bb_p gb, int verbosity) +{ + print_graphite_bb (stderr, gb, 0, verbosity); +} + + +/* Print SCOP to FILE. VERBOSITY determines how verbose the pretty + printers are. */ + +static void +print_scop (FILE *file, scop_p scop, int verbosity) +{ + if (scop == NULL) + return; + + fprintf (file, "\nSCoP_%d_%d (\n", + SCOP_ENTRY (scop)->index, SCOP_EXIT (scop)->index); + + fprintf (file, " (cloog: \n"); + cloog_program_print (file, SCOP_PROG (scop)); + fprintf (file, " )\n"); + + if (SCOP_BBS (scop)) + { + graphite_bb_p gb; + int i; + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + print_graphite_bb (file, gb, 0, verbosity); + } + + fprintf (file, ")\n"); +} + +/* Print all the SCOPs to FILE. VERBOSITY determines how verbose the + code pretty printers are. */ + +static void +print_scops (FILE *file, int verbosity) +{ + int i; + scop_p scop; + + for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++) + print_scop (file, scop, verbosity); +} + +/* Debug SCOP. VERBOSITY determines how verbose the code pretty + printers are. */ + +void +debug_scop (scop_p scop, int verbosity) +{ + print_scop (stderr, scop, verbosity); +} + +/* Debug all SCOPs from CURRENT_SCOPS. VERBOSITY determines how + verbose the code pretty printers are. */ + +void +debug_scops (int verbosity) +{ + print_scops (stderr, verbosity); +} + +/* Return true when BB is contained in SCOP. */ + +static inline bool +bb_in_scop_p (basic_block bb, scop_p scop) +{ + return bitmap_bit_p (SCOP_BBS_B (scop), bb->index); +} + +/* Pretty print to FILE the SCOP in DOT format. */ + +static void +dot_scop_1 (FILE *file, scop_p scop) +{ + edge e; + edge_iterator ei; + basic_block bb; + basic_block entry = SCOP_ENTRY (scop); + basic_block exit = SCOP_EXIT (scop); + + fprintf (file, "digraph SCoP_%d_%d {\n", entry->index, + exit->index); + + FOR_ALL_BB (bb) + { + if (bb == entry) + fprintf (file, "%d [shape=triangle];\n", bb->index); + + if (bb == exit) + fprintf (file, "%d [shape=box];\n", bb->index); + + if (bb_in_scop_p (bb, scop)) + fprintf (file, "%d [color=red];\n", bb->index); + + FOR_EACH_EDGE (e, ei, bb->succs) + fprintf (file, "%d -> %d;\n", bb->index, e->dest->index); + } + + fputs ("}\n\n", file); +} + +/* Display SCOP using dotty. */ + +void +dot_scop (scop_p scop) +{ + dot_scop_1 (stderr, scop); +} + +/* Pretty print all SCoPs in DOT format and mark them with different colors. + If there are not enough colors, paint later SCoPs gray. + Special nodes: + - "*" after the node number: entry of a SCoP, + - "#" after the node number: exit of a SCoP, + - "()" entry or exit not part of SCoP. */ + +static void +dot_all_scops_1 (FILE *file) +{ + basic_block bb; + edge e; + edge_iterator ei; + scop_p scop; + const char* color; + int i; + + /* Disable debugging while printing graph. */ + int tmp_dump_flags = dump_flags; + dump_flags = 0; + + fprintf (file, "digraph all {\n"); + + FOR_ALL_BB (bb) + { + int part_of_scop = false; + + /* Use HTML for every bb label. So we are able to print bbs + which are part of two different SCoPs, with two different + background colors. */ + fprintf (file, "%d [label=<\n <TABLE BORDER=\"0\" CELLBORDER=\"1\" ", + bb->index); + fprintf (file, "CELLSPACING=\"0\">\n"); + + /* Select color for SCoP. */ + for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++) + if (bb_in_scop_p (bb, scop) + || (SESE_EXIT (SCOP_REGION (scop)) && SCOP_EXIT (scop) == bb) + || (SESE_ENTRY (SCOP_REGION (scop)) && SCOP_ENTRY (scop) == bb)) + { + switch (i % 17) + { + case 0: /* red */ + color = "#e41a1c"; + break; + case 1: /* blue */ + color = "#377eb8"; + break; + case 2: /* green */ + color = "#4daf4a"; + break; + case 3: /* purple */ + color = "#984ea3"; + break; + case 4: /* orange */ + color = "#ff7f00"; + break; + case 5: /* yellow */ + color = "#ffff33"; + break; + case 6: /* brown */ + color = "#a65628"; + break; + case 7: /* rose */ + color = "#f781bf"; + break; + case 8: + color = "#8dd3c7"; + break; + case 9: + color = "#ffffb3"; + break; + case 10: + color = "#bebada"; + break; + case 11: + color = "#fb8072"; + break; + case 12: + color = "#80b1d3"; + break; + case 13: + color = "#fdb462"; + break; + case 14: + color = "#b3de69"; + break; + case 15: + color = "#fccde5"; + break; + case 16: + color = "#bc80bd"; + break; + default: /* gray */ + color = "#999999"; + } + + fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"%s\">", color); + + if (!bb_in_scop_p (bb, scop)) + fprintf (file, " ("); + + if (SESE_ENTRY (SCOP_REGION (scop)) + && SESE_EXIT (SCOP_REGION (scop)) + && bb == SCOP_ENTRY (scop) + && bb == SCOP_EXIT (scop)) + fprintf (file, " %d*# ", bb->index); + else if (SESE_ENTRY (SCOP_REGION (scop)) + && bb == SCOP_ENTRY (scop)) + fprintf (file, " %d* ", bb->index); + else if (SESE_EXIT (SCOP_REGION (scop)) + && bb == SCOP_EXIT (scop)) + fprintf (file, " %d# ", bb->index); + else + fprintf (file, " %d ", bb->index); + + if (!bb_in_scop_p (bb, scop)) + fprintf (file, ")"); + + fprintf (file, "</TD></TR>\n"); + + part_of_scop = true; + } + + if (!part_of_scop) + { + fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"#ffffff\">"); + fprintf (file, " %d </TD></TR>\n", bb->index); + } + + fprintf (file, " </TABLE>>, shape=box, style=\"setlinewidth(0)\"]\n"); + } + + FOR_ALL_BB (bb) + { + FOR_EACH_EDGE (e, ei, bb->succs) + fprintf (file, "%d -> %d;\n", bb->index, e->dest->index); + } + + fputs ("}\n\n", file); + + /* Enable debugging again. */ + dump_flags = tmp_dump_flags; +} + +/* Display all SCoPs using dotty. */ + +void +dot_all_scops (void) +{ + /* When debugging, enable the following code. This cannot be used + in production compilers because it calls "system". */ +#if 0 + FILE *stream = fopen ("/tmp/allscops.dot", "w"); + gcc_assert (stream); + + dot_all_scops_1 (stream); + fclose (stream); + + system ("dotty /tmp/allscops.dot"); +#else + dot_all_scops_1 (stderr); +#endif +} + +/* Returns true when LOOP is in SCOP. */ + +static inline bool +loop_in_scop_p (struct loop *loop, scop_p scop) +{ + return (bb_in_scop_p (loop->header, scop) + && bb_in_scop_p (loop->latch, scop)); +} + +/* Returns the outermost loop in SCOP that contains BB. */ + +static struct loop * +outermost_loop_in_scop (scop_p scop, basic_block bb) +{ + struct loop *nest; + + nest = bb->loop_father; + while (loop_outer (nest) && loop_in_scop_p (loop_outer (nest), scop)) + nest = loop_outer (nest); + + return nest; +} + +/* Return true when EXPR is an affine function in LOOP with parameters + instantiated relative to outermost_loop. */ + +static bool +loop_affine_expr (struct loop *outermost_loop, struct loop *loop, tree expr) +{ + int n = outermost_loop->num; + tree scev = analyze_scalar_evolution (loop, expr); + + scev = instantiate_scev (outermost_loop, loop, scev); + + return (evolution_function_is_invariant_p (scev, n) + || evolution_function_is_affine_multivariate_p (scev, n)); +} + +/* Return true if the operand OP is simple. */ + +static bool +is_simple_operand (loop_p loop, gimple stmt, tree op) +{ + /* It is not a simple operand when it is a declaration, */ + if (DECL_P (op) + /* or a structure, */ + || AGGREGATE_TYPE_P (TREE_TYPE (op)) + /* or a memory access that cannot be analyzed by the data + reference analysis. */ + || ((handled_component_p (op) || INDIRECT_REF_P (op)) + && !stmt_simple_memref_p (loop, stmt, op))) + return false; + + return true; +} + +/* Return true only when STMT is simple enough for being handled by + Graphite. This depends on OUTERMOST_LOOP, as the parametetrs are + initialized relative to this loop. */ + +static bool +stmt_simple_for_scop_p (struct loop *outermost_loop, gimple stmt) +{ + basic_block bb = gimple_bb (stmt); + struct loop *loop = bb->loop_father; + + /* GIMPLE_ASM and GIMPLE_CALL may embed arbitrary side effects. + Calls have side-effects, except those to const or pure + functions. */ + if (gimple_has_volatile_ops (stmt) + || (gimple_code (stmt) == GIMPLE_CALL + && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))) + || (gimple_code (stmt) == GIMPLE_ASM)) + return false; + + switch (gimple_code (stmt)) + { + case GIMPLE_RETURN: + case GIMPLE_LABEL: + return true; + + case GIMPLE_COND: + { + tree op; + ssa_op_iter op_iter; + enum tree_code code = gimple_cond_code (stmt); + + /* We can only handle this kind of conditional expressions. + For inequalities like "if (i != 3 * k)" we need unions of + polyhedrons. Expressions like "if (a)" or "if (a == 15)" need + them for the else branch. */ + if (!(code == LT_EXPR + || code == GT_EXPR + || code == LE_EXPR + || code == GE_EXPR)) + return false; + + if (!outermost_loop) + return false; + + FOR_EACH_SSA_TREE_OPERAND (op, stmt, op_iter, SSA_OP_ALL_USES) + if (!loop_affine_expr (outermost_loop, loop, op)) + return false; + + return true; + } + + case GIMPLE_ASSIGN: + { + enum tree_code code = gimple_assign_rhs_code (stmt); + + switch (get_gimple_rhs_class (code)) + { + case GIMPLE_UNARY_RHS: + case GIMPLE_SINGLE_RHS: + return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt)) + && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt))); + + case GIMPLE_BINARY_RHS: + return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt)) + && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt)) + && is_simple_operand (loop, stmt, gimple_assign_rhs2 (stmt))); + + case GIMPLE_INVALID_RHS: + default: + gcc_unreachable (); + } + } + + case GIMPLE_CALL: + { + size_t i; + size_t n = gimple_call_num_args (stmt); + tree lhs = gimple_call_lhs (stmt); + + for (i = 0; i < n; i++) + { + tree arg = gimple_call_arg (stmt, i); + + if (!(is_simple_operand (loop, stmt, lhs) + && is_simple_operand (loop, stmt, arg))) + return false; + } + + return true; + } + + default: + /* These nodes cut a new scope. */ + return false; + } + + return false; +} + +/* Returns the statement of BB that contains a harmful operation: that + can be a function call with side effects, data dependences that + cannot be computed in OUTERMOST_LOOP, the induction variables are + not linear with respect to OUTERMOST_LOOP, etc. The current open + scop should end before this statement. */ + +static gimple +harmful_stmt_in_bb (struct loop *outermost_loop, basic_block bb) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (!stmt_simple_for_scop_p (outermost_loop, gsi_stmt (gsi))) + return gsi_stmt (gsi); + + return NULL; +} + +/* Store the GRAPHITE representation of BB. */ + +static void +new_graphite_bb (scop_p scop, basic_block bb) +{ + struct graphite_bb *gbb = XNEW (struct graphite_bb); + + bb->aux = gbb; + GBB_BB (gbb) = bb; + GBB_SCOP (gbb) = scop; + GBB_DATA_REFS (gbb) = NULL; + GBB_DOMAIN (gbb) = NULL; + GBB_CONDITIONS (gbb) = NULL; + GBB_CONDITION_CASES (gbb) = NULL; + GBB_LOOPS (gbb) = NULL; + VEC_safe_push (graphite_bb_p, heap, SCOP_BBS (scop), gbb); + bitmap_set_bit (SCOP_BBS_B (scop), bb->index); +} + +/* Frees GBB. */ + +static void +free_graphite_bb (struct graphite_bb *gbb) +{ + if (GBB_DOMAIN (gbb)) + cloog_matrix_free (GBB_DOMAIN (gbb)); + + free_data_refs (GBB_DATA_REFS (gbb)); + VEC_free (gimple, heap, GBB_CONDITIONS (gbb)); + VEC_free (gimple, heap, GBB_CONDITION_CASES (gbb)); + VEC_free (loop_p, heap, GBB_LOOPS (gbb)); + + GBB_BB (gbb)->aux = 0; + XDELETE (gbb); +} + +/* Creates a new scop starting with ENTRY. */ + +static scop_p +new_scop (edge entry) +{ + scop_p scop = XNEW (struct scop); + + SCOP_REGION (scop) = XNEW (struct sese); + SESE_ENTRY (SCOP_REGION (scop)) = entry; + SESE_EXIT (SCOP_REGION (scop)) = NULL; + SCOP_BBS (scop) = VEC_alloc (graphite_bb_p, heap, 3); + SCOP_OLDIVS (scop) = VEC_alloc (name_tree, heap, 3); + SCOP_BBS_B (scop) = BITMAP_ALLOC (NULL); + SCOP_LOOPS (scop) = BITMAP_ALLOC (NULL); + SCOP_LOOP_NEST (scop) = VEC_alloc (loop_p, heap, 3); + SCOP_PARAMS (scop) = VEC_alloc (name_tree, heap, 3); + SCOP_PROG (scop) = cloog_program_malloc (); + cloog_program_set_names (SCOP_PROG (scop), cloog_names_malloc ()); + SCOP_LOOP2CLOOG_LOOP (scop) = htab_create (10, hash_loop_to_cloog_loop, + eq_loop_to_cloog_loop, + free); + return scop; +} + +/* Deletes SCOP. */ + +static void +free_scop (scop_p scop) +{ + int i; + name_tree p; + struct graphite_bb *gb; + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + free_graphite_bb (gb); + + VEC_free (graphite_bb_p, heap, SCOP_BBS (scop)); + BITMAP_FREE (SCOP_BBS_B (scop)); + BITMAP_FREE (SCOP_LOOPS (scop)); + VEC_free (loop_p, heap, SCOP_LOOP_NEST (scop)); + VEC_free (name_tree, heap, SCOP_OLDIVS (scop)); + + for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++) + free (p); + + VEC_free (name_tree, heap, SCOP_PARAMS (scop)); + cloog_program_free (SCOP_PROG (scop)); + htab_delete (SCOP_LOOP2CLOOG_LOOP (scop)); + XDELETE (SCOP_REGION (scop)); + XDELETE (scop); +} + +/* Deletes all scops in SCOPS. */ + +static void +free_scops (VEC (scop_p, heap) *scops) +{ + int i; + scop_p scop; + + for (i = 0; VEC_iterate (scop_p, scops, i, scop); i++) + free_scop (scop); + + VEC_free (scop_p, heap, scops); +} + +typedef enum gbb_type { + GBB_UNKNOWN, + GBB_LOOP_SING_EXIT_HEADER, + GBB_LOOP_MULT_EXIT_HEADER, + GBB_LOOP_EXIT, + GBB_COND_HEADER, + GBB_SIMPLE, + GBB_LAST +} gbb_type; + +/* Detect the type of BB. Loop headers are only marked, if they are + new. This means their loop_father is different to LAST_LOOP. + Otherwise they are treated like any other bb and their type can be + any other type. */ + +static gbb_type +get_bb_type (basic_block bb, struct loop *last_loop) +{ + VEC (basic_block, heap) *dom; + int nb_dom, nb_suc; + struct loop *loop = bb->loop_father; + + /* Check, if we entry into a new loop. */ + if (loop != last_loop) + { + if (single_exit (loop) != NULL) + return GBB_LOOP_SING_EXIT_HEADER; + else if (loop->num != 0) + return GBB_LOOP_MULT_EXIT_HEADER; + else + return GBB_COND_HEADER; + } + + dom = get_dominated_by (CDI_DOMINATORS, bb); + nb_dom = VEC_length (basic_block, dom); + VEC_free (basic_block, heap, dom); + if (nb_dom == 0) + return GBB_LAST; + + nb_suc = VEC_length (edge, bb->succs); + if (nb_dom == 1 && nb_suc == 1) + return GBB_SIMPLE; + + return GBB_COND_HEADER; +} + +/* Moves the scops from SOURCE to TARGET and clean up SOURCE. */ + +static void +move_scops (VEC (scop_p, heap) **source, VEC (scop_p, heap) **target) +{ + scop_p s; + int i; + + for (i = 0; VEC_iterate (scop_p, *source, i, s); i++) + VEC_safe_push (scop_p, heap, *target, s); + + VEC_free (scop_p, heap, *source); +} + +/* Store information needed by scopdet_* functions. */ + +struct scopdet_info +{ + /* Where the last open scop would stop if the current BB is harmful. */ + edge last; + + /* Where the next scop would start if the current BB is harmful. */ + edge next; + + /* The bb or one of its children contains open loop exits. That means + loop exit nodes that are not surrounded by a loop dominated by bb. */ + bool exits; + + /* The bb or one of its children contains only structures we can handle. */ + bool difficult; +}; + +static struct scopdet_info build_scops_1 (edge, VEC (scop_p, heap) **, + loop_p, loop_p); + +/* Checks, if a bb can be added to a SCoP. */ + +static struct scopdet_info +scopdet_edge_info (edge ee, loop_p outermost_loop, + VEC (scop_p, heap) **scops, gbb_type type, gimple *stmt) + +{ + basic_block bb = ee->dest; + struct loop *loop = bb->loop_father; + struct scopdet_info result; + + *stmt = harmful_stmt_in_bb (outermost_loop, bb); + result.difficult = (*stmt != NULL); + result.last = NULL; + + switch (type) + { + case GBB_LAST: + result.next = NULL; + result.exits = false; + result.last = ee; + break; + + case GBB_SIMPLE: + result.next = single_succ_edge (bb); + result.exits = false; + result.last = ee; + break; + + case GBB_LOOP_SING_EXIT_HEADER: + { + VEC (scop_p, heap) *tmp_scops = VEC_alloc (scop_p, heap, 3); + struct scopdet_info sinfo; + + sinfo = build_scops_1 (ee, &tmp_scops, loop, outermost_loop); + + result.last = single_exit (bb->loop_father); + + if (single_succ_p (result.last->dest) + && get_bb_type (result.last->dest, loop) == GBB_SIMPLE) + result.next = single_succ_edge (result.last->dest); + else + result.next = split_block (result.last->dest, NULL); + + /* If we do not dominate result.next, remove it. It's either + the EXIT_BLOCK_PTR, or another bb dominates it and will + call the scop detection for this bb. */ + if (!dominated_by_p (CDI_DOMINATORS, result.next->dest, bb)) + result.next = NULL; + + if (TREE_CODE (number_of_latch_executions (loop)) + == SCEV_NOT_KNOWN) + result.difficult = true; + + if (sinfo.difficult) + move_scops (&tmp_scops, scops); + else + free_scops (tmp_scops); + + result.exits = false; + result.difficult |= sinfo.difficult; + break; + } + + case GBB_LOOP_MULT_EXIT_HEADER: + { + /* XXX: Handle loop nests with the same header. */ + /* XXX: Handle iterative optimization of outermost_loop. */ + /* XXX: For now we just do not join loops with multiple exits. If the + exits lead to the same bb it may be possible to join the loop. */ + VEC (scop_p, heap) *tmp_scops = VEC_alloc (scop_p, heap, 3); + VEC (edge, heap) *exits = get_loop_exit_edges (loop); + edge e; + int i; + build_scops_1 (ee, &tmp_scops, loop, outermost_loop); + + for (i = 0; VEC_iterate (edge, exits, i, e); i++) + if (dominated_by_p (CDI_DOMINATORS, e->dest, e->src) + && e->dest->loop_father == loop_outer (loop)) + build_scops_1 (e, &tmp_scops, e->dest->loop_father, + outermost_loop); + + result.next = NULL; + result.last = NULL; + result.difficult = true; + result.exits = false; + move_scops (&tmp_scops, scops); + VEC_free (edge, heap, exits); + break; + } + case GBB_COND_HEADER: + { + VEC (scop_p, heap) *tmp_scops = VEC_alloc (scop_p, heap, 3); + struct scopdet_info sinfo; + VEC (basic_block, heap) *dominated; + int i; + basic_block dom_bb; + basic_block last_bb = NULL; + edge last_e = NULL; + edge e; + result.exits = false; + + /* First check the successors of BB, and check if it is possible to join + the different branches. */ + for (i = 0; VEC_iterate (edge, bb->succs, i, e); i++) + { + /* Ignore loop exits. They will be handled after the loop body. */ + if (is_loop_exit (loop, e->dest)) + { + result.exits = true; + continue; + } + + /* Do not follow edges that lead to the end of the + conditions block. For example, in + + | 0 + | /|\ + | 1 2 | + | | | | + | 3 4 | + | \|/ + | 6 + + the edge from 0 => 6. Only check if all paths lead to + the same node 6. */ + + if (!single_pred_p (e->dest)) + { + /* Check, if edge leads directly to the end of this + condition. */ + if (!last_bb) + { + last_bb = e->dest; + last_e = e; + } + + if (e->dest != last_bb) + result.difficult = true; + + continue; + } + + if (!dominated_by_p (CDI_DOMINATORS, e->dest, bb)) + { + result.difficult = true; + continue; + } + + sinfo = build_scops_1 (e, &tmp_scops, loop, outermost_loop); + + result.exits |= sinfo.exits; + result.last = sinfo.last; + result.difficult |= sinfo.difficult; + + /* Checks, if all branches end at the same point. + If that is true, the condition stays joinable. + Have a look at the example above. */ + if (sinfo.last && single_succ_p (sinfo.last->dest)) + { + basic_block next_tmp = single_succ (sinfo.last->dest); + + if (!last_bb) + { + last_bb = next_tmp; + last_e = single_succ_edge (sinfo.last->dest); + } + + if (next_tmp != last_bb) + result.difficult = true; + } + else + result.difficult = true; + } + + /* If the condition is joinable. */ + if (!result.exits && !result.difficult) + { + /* Only return a next pointer if we dominate this pointer. + Otherwise it will be handled by the bb dominating it. */ + if (dominated_by_p (CDI_DOMINATORS, last_bb, bb) && last_bb != bb) + result.next = last_e; + else + result.next = NULL; + + move_scops (&tmp_scops, scops); + break; + } + + /* Scan remaining bbs dominated by BB. */ + dominated = get_dominated_by (CDI_DOMINATORS, bb); + + for (i = 0; VEC_iterate (basic_block, dominated, i, dom_bb); i++) + { + /* Ignore loop exits: they will be handled after the loop body. */ + if (is_loop_exit (loop, dom_bb)) + { + result.exits = true; + continue; + } + + /* Ignore the bbs processed above. */ + if (single_pred_p (dom_bb) && single_pred (dom_bb) == bb) + continue; + + if (single_pred_p (dom_bb)) + e = single_pred_edge (dom_bb); + else + e = split_block (dom_bb, NULL); + + if (loop_depth (loop) > loop_depth (dom_bb->loop_father)) + sinfo = build_scops_1 (e, &tmp_scops, loop_outer (loop), + outermost_loop); + else + sinfo = build_scops_1 (e, &tmp_scops, loop, outermost_loop); + + + result.exits |= sinfo.exits; + result.difficult = true; + result.last = NULL; + } + + VEC_free (basic_block, heap, dominated); + + result.next = NULL; + move_scops (&tmp_scops, scops); + + break; + } + + default: + gcc_unreachable (); + } + + return result; +} + +/* Split EXIT before STMT when STMT is non NULL. */ + +static edge +split_difficult_bb (basic_block exit, edge *last, gimple stmt) +{ + if (stmt && VEC_length (edge, exit->preds) == 1) + { + edge e; + + if (stmt == gsi_stmt (gsi_after_labels (exit))) + stmt = NULL; + else + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_prev (&gsi); + stmt = gsi_stmt (gsi); + } + + e = split_block (exit, stmt); + set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); + set_immediate_dominator (CDI_POST_DOMINATORS, e->src, e->dest); + exit = e->dest; + + if (last) + *last = e; + + return e; + } + + return NULL; +} + +/* End SCOP with edge EXIT. */ + +static void +end_scop (scop_p scop, edge exit, bool split_entry) +{ + if (split_entry + && !single_pred_p (SCOP_ENTRY (scop)) + && exit->dest->loop_father == SCOP_ENTRY (scop)->loop_father) + SESE_ENTRY (SCOP_REGION (scop)) = split_block (SCOP_ENTRY (scop), NULL); + + SESE_EXIT (SCOP_REGION (scop)) = exit; +} + +/* Creates the SCoPs and writes entry and exit points for every SCoP. */ + +static struct scopdet_info +build_scops_1 (edge start, VEC (scop_p, heap) **scops, loop_p loop, + loop_p outermost_loop) +{ + edge current = start; + + bool in_scop = false; + scop_p open_scop = NULL; + gimple stmt; + struct scopdet_info sinfo; + + /* Initialize result. */ + struct scopdet_info result; + result.exits = false; + result.difficult = false; + result.next = NULL; + result.last = NULL; + + /* Loop over the dominance tree. If we meet a difficult bb, close + the current SCoP. Loop and condition header start a new layer, + and can only be added if all bbs in deeper layers are simple. */ + while (current != NULL) + { + sinfo = scopdet_edge_info (current, outermost_loop, scops, + get_bb_type (current->dest, loop), &stmt); + + if (!in_scop && !(sinfo.exits || sinfo.difficult)) + { + open_scop = new_scop (current); + + VEC_safe_push (scop_p, heap, *scops, open_scop); + in_scop = true; + } + else if (in_scop && (sinfo.exits || sinfo.difficult)) + { + edge exit = split_difficult_bb (current->dest, &sinfo.last, stmt); + + if (!exit) + exit = current; + + end_scop (open_scop, exit, sinfo.difficult); + in_scop = false; + } + + result.difficult |= sinfo.difficult; + result.exits |= sinfo.exits; + + current = sinfo.next; + } + + /* Finish the SCOP, if it is left open. The exit is the bb, that + postdominates sinfo.last. If no such bb exists, we use info.last + or delete the scop. */ + if (in_scop) + { + int i; + edge e; + + for (i = 0; VEC_iterate (edge, sinfo.last->dest->succs, i, e); i++) + if (dominated_by_p (CDI_POST_DOMINATORS, sinfo.last->dest, e->dest)) + { + edge exit = split_difficult_bb (e->dest, &sinfo.last, stmt); + + if (exit) + end_scop (open_scop, exit, sinfo.difficult); + else + end_scop (open_scop, e, sinfo.difficult); + + goto done; + } + + if (SCOP_ENTRY (open_scop) != sinfo.last->dest) + { + edge exit = split_difficult_bb (sinfo.last->dest, NULL, stmt); + + if (exit) + end_scop (open_scop, exit, sinfo.difficult); + else + end_scop (open_scop, sinfo.last, sinfo.difficult); + } + else + { + VEC_pop (scop_p, *scops); + free_scop (open_scop); + } + } + + done: + result.last = sinfo.last; + + return result; +} + +/* Find static control parts. */ + +static void +build_scops (void) +{ + struct loop *loop = current_loops->tree_root; + build_scops_1 (single_succ_edge (ENTRY_BLOCK_PTR), ¤t_scops, loop, loop); +} + +/* Gather the basic blocks belonging to the SCOP. */ + +static void +build_scop_bbs (scop_p scop) +{ + basic_block *stack = XNEWVEC (basic_block, n_basic_blocks + 1); + sbitmap visited = sbitmap_alloc (last_basic_block); + int sp = 0; + + sbitmap_zero (visited); + stack[sp++] = SCOP_ENTRY (scop); + + while (sp) + { + basic_block bb = stack[--sp]; + int depth = loop_depth (bb->loop_father); + int num = bb->loop_father->num; + edge_iterator ei; + edge e; + + /* Scop's exit is not in the scop. Exclude also bbs, which are + dominated by the SCoP exit. These are e.g. loop latches. */ + if (TEST_BIT (visited, bb->index) + || dominated_by_p (CDI_DOMINATORS, bb, SCOP_EXIT (scop)) + /* Every block in the scop is dominated by scop's entry. */ + || !dominated_by_p (CDI_DOMINATORS, bb, SCOP_ENTRY (scop))) + continue; + + new_graphite_bb (scop, bb); + SET_BIT (visited, bb->index); + + /* First push the blocks that have to be processed last. Note + that this means that the order in which the code is organized + below is important: do not reorder the following code. */ + FOR_EACH_EDGE (e, ei, bb->succs) + if (! TEST_BIT (visited, e->dest->index) + && (int) loop_depth (e->dest->loop_father) < depth) + stack[sp++] = e->dest; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (! TEST_BIT (visited, e->dest->index) + && (int) loop_depth (e->dest->loop_father) == depth + && e->dest->loop_father->num != num) + stack[sp++] = e->dest; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (! TEST_BIT (visited, e->dest->index) + && (int) loop_depth (e->dest->loop_father) == depth + && e->dest->loop_father->num == num + && EDGE_COUNT (e->dest->preds) > 1) + stack[sp++] = e->dest; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (! TEST_BIT (visited, e->dest->index) + && (int) loop_depth (e->dest->loop_father) == depth + && e->dest->loop_father->num == num + && EDGE_COUNT (e->dest->preds) == 1) + stack[sp++] = e->dest; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (! TEST_BIT (visited, e->dest->index) + && (int) loop_depth (e->dest->loop_father) > depth) + stack[sp++] = e->dest; + } + + free (stack); + sbitmap_free (visited); +} + + +/* Record LOOP as occuring in SCOP. */ + +static void +scop_record_loop (scop_p scop, struct loop *loop) +{ + loop_p parent; + tree induction_var; + + if (bitmap_bit_p (SCOP_LOOPS (scop), loop->num)) + return; + + parent = loop_outer (loop); + induction_var = find_induction_var_from_exit_cond (loop); + + if (!bb_in_scop_p (parent->latch, scop)) + parent = NULL; + + if (induction_var != NULL_TREE) + { + name_tree oldiv = XNEW (struct name_tree); + oldiv->t = SSA_NAME_VAR (induction_var); + if (DECL_NAME (oldiv->t)) + oldiv->name = IDENTIFIER_POINTER (DECL_NAME (oldiv->t)); + else + { + char *n = XNEWVEC (char, 16); + sprintf (n, "D.%u", DECL_UID (oldiv->t)); + oldiv->name = n; + } + oldiv->loop = loop; + + VEC_safe_push (name_tree, heap, SCOP_OLDIVS (scop), oldiv); + } + + bitmap_set_bit (SCOP_LOOPS (scop), loop->num); + VEC_safe_push (loop_p, heap, SCOP_LOOP_NEST (scop), loop); +} + +/* Build the loop nests contained in SCOP. */ + +static void +build_scop_loop_nests (scop_p scop) +{ + unsigned i; + graphite_bb_p gb; + struct loop *loop0, *loop1; + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + { + struct loop *loop = gbb_loop (gb); + + /* Only add loops, if they are completely contained in the SCoP. */ + if (loop->header == GBB_BB (gb) + && bb_in_scop_p (loop->latch, scop)) + scop_record_loop (scop, gbb_loop (gb)); + } + + /* Make sure that the loops in the SCOP_LOOP_NEST are ordered. It + can be the case that an inner loop is inserted before an outer + loop. To avoid this, semi-sort once. */ + for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop0); i++) + { + if (VEC_length (loop_p, SCOP_LOOP_NEST (scop)) == i + 1) + break; + + loop1 = VEC_index (loop_p, SCOP_LOOP_NEST (scop), i + 1); + if (loop0->num > loop1->num) + { + VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i, loop1); + VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i + 1, loop0); + } + } +} + +/* Calculate the number of loops around GB in the current SCOP. */ + +static inline int +nb_loops_around_gb (graphite_bb_p gb) +{ + scop_p scop = GBB_SCOP (gb); + struct loop *l = gbb_loop (gb); + int d = 0; + + for (; loop_in_scop_p (l, scop); d++, l = loop_outer (l)); + + return d; +} + +/* Build for BB the static schedule. + + The STATIC_SCHEDULE is defined like this: + + A + for (i: ...) + { + for (j: ...) + { + B + C + } + + for (k: ...) + { + D + E + } + } + F + + Static schedules for A to F: + + DEPTH + 0 1 2 + A 0 + B 1 0 0 + C 1 0 1 + D 1 1 0 + E 1 1 1 + F 2 +*/ + +static void +build_scop_canonical_schedules (scop_p scop) +{ + int i, j; + graphite_bb_p gb; + int nb = scop_nb_loops (scop) + 1; + + SCOP_STATIC_SCHEDULE (scop) = lambda_vector_new (nb); + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + { + int offset = nb_loops_around_gb (gb); + + /* After leaving a loop, it is possible that the schedule is not + set at zero. This loop reinitializes components located + after OFFSET. */ + + for (j = offset + 1; j < nb; j++) + if (SCOP_STATIC_SCHEDULE (scop)[j]) + { + memset (&(SCOP_STATIC_SCHEDULE (scop)[j]), 0, + sizeof (int) * (nb - j)); + ++SCOP_STATIC_SCHEDULE (scop)[offset]; + break; + } + + GBB_STATIC_SCHEDULE (gb) = lambda_vector_new (offset + 1); + lambda_vector_copy (SCOP_STATIC_SCHEDULE (scop), + GBB_STATIC_SCHEDULE (gb), offset + 1); + + ++SCOP_STATIC_SCHEDULE (scop)[offset]; + } +} + +/* Build the LOOPS vector for all bbs in SCOP. */ + +static void +build_bb_loops (scop_p scop) +{ + graphite_bb_p gb; + int i; + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + { + loop_p loop; + int depth; + + depth = nb_loops_around_gb (gb) - 1; + + GBB_LOOPS (gb) = VEC_alloc (loop_p, heap, 3); + VEC_safe_grow_cleared (loop_p, heap, GBB_LOOPS (gb), depth + 1); + + loop = GBB_BB (gb)->loop_father; + + while (scop_contains_loop (scop, loop)) + { + VEC_replace (loop_p, GBB_LOOPS (gb), depth, loop); + loop = loop_outer (loop); + depth--; + } + } +} + +/* Get the index for parameter VAR in SCOP. */ + +static int +param_index (tree var, scop_p scop) +{ + int i; + name_tree p; + name_tree nvar; + + gcc_assert (TREE_CODE (var) == SSA_NAME); + + for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++) + if (p->t == var) + return i; + + nvar = XNEW (struct name_tree); + nvar->t = var; + nvar->name = NULL; + VEC_safe_push (name_tree, heap, SCOP_PARAMS (scop), nvar); + return VEC_length (name_tree, SCOP_PARAMS (scop)) - 1; +} + +/* Scan EXPR and translate it to an inequality vector INEQ that will + be added, or subtracted, in the constraint domain matrix C at row + R. K is the number of columns for loop iterators in C. */ + +static void +scan_tree_for_params (scop_p s, tree e, CloogMatrix *c, int r, Value k, + bool subtract) +{ + int cst_col, param_col; + + if (e == chrec_dont_know) + return; + + switch (TREE_CODE (e)) + { + case POLYNOMIAL_CHREC: + { + tree left = CHREC_LEFT (e); + tree right = CHREC_RIGHT (e); + int var = CHREC_VARIABLE (e); + + if (TREE_CODE (right) != INTEGER_CST) + return; + + if (c) + { + int loop_col = scop_gimple_loop_depth (s, get_loop (var)) + 1; + + if (subtract) + value_sub_int (c->p[r][loop_col], c->p[r][loop_col], + int_cst_value (right)); + else + value_add_int (c->p[r][loop_col], c->p[r][loop_col], + int_cst_value (right)); + } + + switch (TREE_CODE (left)) + { + case POLYNOMIAL_CHREC: + scan_tree_for_params (s, left, c, r, k, subtract); + return; + + case INTEGER_CST: + /* Constant part. */ + if (c) + { + int v = int_cst_value (left); + cst_col = c->NbColumns - 1; + + if (v < 0) + { + v = -v; + subtract = subtract ? false : true; + } + + if (subtract) + value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v); + else + value_add_int (c->p[r][cst_col], c->p[r][cst_col], v); + } + return; + + default: + scan_tree_for_params (s, left, c, r, k, subtract); + return; + } + } + break; + + case MULT_EXPR: + if (chrec_contains_symbols (TREE_OPERAND (e, 0))) + { + Value val; + + gcc_assert (host_integerp (TREE_OPERAND (e, 1), 0)); + + value_init (val); + value_set_si (val, int_cst_value (TREE_OPERAND (e, 1))); + value_multiply (k, k, val); + value_clear (val); + scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); + } + else + { + Value val; + + gcc_assert (host_integerp (TREE_OPERAND (e, 0), 0)); + + value_init (val); + value_set_si (val, int_cst_value (TREE_OPERAND (e, 0))); + value_multiply (k, k, val); + value_clear (val); + scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract); + } + break; + + case PLUS_EXPR: + scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); + scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract); + break; + + case MINUS_EXPR: + scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); + value_oppose (k, k); + scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract); + break; + + case NEGATE_EXPR: + value_oppose (k, k); + scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); + break; + + case SSA_NAME: + param_col = param_index (e, s); + + if (c) + { + param_col += c->NbColumns - scop_nb_params (s) - 1; + + if (subtract) + value_subtract (c->p[r][param_col], c->p[r][param_col], k); + else + value_addto (c->p[r][param_col], c->p[r][param_col], k); + } + break; + + case INTEGER_CST: + if (c) + { + int v = int_cst_value (e); + cst_col = c->NbColumns - 1; + + if (v < 0) + { + v = -v; + subtract = subtract ? false : true; + } + + if (subtract) + value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v); + else + value_add_int (c->p[r][cst_col], c->p[r][cst_col], v); + } + break; + + case NOP_EXPR: + case CONVERT_EXPR: + case NON_LVALUE_EXPR: + scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); + break; + + default: + gcc_unreachable (); + break; + } +} + +/* Data structure for idx_record_params. */ + +struct irp_data +{ + struct loop *loop; + scop_p scop; +}; + +/* For a data reference with an ARRAY_REF as its BASE, record the + parameters occurring in IDX. DTA is passed in as complementary + information, and is used by the automatic walker function. This + function is a callback for for_each_index. */ + +static bool +idx_record_params (tree base, tree *idx, void *dta) +{ + struct irp_data *data = (struct irp_data *) dta; + + if (TREE_CODE (base) != ARRAY_REF) + return true; + + if (TREE_CODE (*idx) == SSA_NAME) + { + tree scev; + scop_p scop = data->scop; + struct loop *loop = data->loop; + + scev = analyze_scalar_evolution (loop, *idx); + scev = instantiate_scev (outermost_loop_in_scop (scop, loop->header), + loop, scev); + + { + Value one; + + value_init (one); + value_set_si (one, 1); + scan_tree_for_params (scop, scev, NULL, 0, one, false); + value_clear (one); + } + } + + return true; +} + +/* Find parameters with respect to SCOP in BB. We are looking in memory + access functions, conditions and loop bounds. */ + +static void +find_params_in_bb (scop_p scop, basic_block bb) +{ + int i; + data_reference_p dr; + VEC (data_reference_p, heap) *drs; + gimple_stmt_iterator gsi; + struct loop *nest = outermost_loop_in_scop (scop, bb); + + /* Find the parameters used in the memory access functions. */ + drs = VEC_alloc (data_reference_p, heap, 5); + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + find_data_references_in_stmt (nest, gsi_stmt (gsi), &drs); + + for (i = 0; VEC_iterate (data_reference_p, drs, i, dr); i++) + { + struct irp_data irp; + + irp.loop = bb->loop_father; + irp.scop = scop; + for_each_index (&dr->ref, idx_record_params, &irp); + free_data_ref (dr); + } + + VEC_free (data_reference_p, heap, drs); + + /* Find parameters in conditional statements. */ + gsi = gsi_last_bb (bb); + if (!gsi_end_p (gsi)) + { + gimple stmt = gsi_stmt (gsi); + + if (gimple_code (stmt) == GIMPLE_COND) + { + Value one; + loop_p loop = bb->loop_father; + + tree lhs, rhs; + + lhs = gimple_cond_lhs (stmt); + lhs = analyze_scalar_evolution (loop, lhs); + lhs = instantiate_scev (nest, loop, lhs); + + rhs = gimple_cond_rhs (stmt); + rhs = analyze_scalar_evolution (loop, rhs); + rhs = instantiate_scev (nest, loop, rhs); + + value_init (one); + scan_tree_for_params (scop, lhs, NULL, 0, one, false); + value_set_si (one, 1); + scan_tree_for_params (scop, rhs, NULL, 0, one, false); + value_clear (one); + } + } +} + +/* Saves in NV the name of variable P->T. */ + +static void +save_var_name (char **nv, int i, name_tree p) +{ + const char *name = get_name (SSA_NAME_VAR (p->t)); + + if (name) + { + nv[i] = XNEWVEC (char, strlen (name) + 12); + sprintf (nv[i], "%s_%12d", name, SSA_NAME_VERSION (p->t)); + } + else + { + nv[i] = XNEWVEC (char, 12); + sprintf (nv[i], "T_%12d", SSA_NAME_VERSION (p->t)); + } + + p->name = nv[i]; +} + +/* Return the maximal loop depth in SCOP. */ + +static int +scop_max_loop_depth (scop_p scop) +{ + int i; + graphite_bb_p gbb; + int max_nb_loops = 0; + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++) + { + int nb_loops = gbb_nb_loops (gbb); + if (max_nb_loops < nb_loops) + max_nb_loops = nb_loops; + } + + return max_nb_loops; +} + +/* Initialize Cloog's parameter names from the names used in GIMPLE. + Initialize Cloog's iterator names, using 'graphite_iterator_%d' + from 0 to scop_nb_loops (scop). */ + +static void +initialize_cloog_names (scop_p scop) +{ + int i, nb_params = VEC_length (name_tree, SCOP_PARAMS (scop)); + char **params = XNEWVEC (char *, nb_params); + int nb_iterators = scop_max_loop_depth (scop); + int nb_scattering= cloog_program_nb_scattdims (SCOP_PROG (scop)); + char **iterators = XNEWVEC (char *, nb_iterators * 2); + char **scattering = XNEWVEC (char *, nb_scattering); + name_tree p; + + for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++) + save_var_name (params, i, p); + + cloog_names_set_nb_parameters (cloog_program_names (SCOP_PROG (scop)), + nb_params); + cloog_names_set_parameters (cloog_program_names (SCOP_PROG (scop)), + params); + + for (i = 0; i < nb_iterators; i++) + { + iterators[i] = XNEWVEC (char, 18 + 12); + sprintf (iterators[i], "graphite_iterator_%d", i); + } + + cloog_names_set_nb_iterators (cloog_program_names (SCOP_PROG (scop)), + nb_iterators); + cloog_names_set_iterators (cloog_program_names (SCOP_PROG (scop)), + iterators); + + for (i = 0; i < nb_scattering; i++) + { + scattering[i] = XNEWVEC (char, 2 + 12); + sprintf (scattering[i], "s_%d", i); + } + + cloog_names_set_nb_scattering (cloog_program_names (SCOP_PROG (scop)), + nb_scattering); + cloog_names_set_scattering (cloog_program_names (SCOP_PROG (scop)), + scattering); +} + +/* Record the parameters used in the SCOP. A variable is a parameter + in a scop if it does not vary during the execution of that scop. */ + +static void +find_scop_parameters (scop_p scop) +{ + graphite_bb_p gb; + unsigned i; + struct loop *loop; + Value one; + + value_init (one); + value_set_si (one, 1); + + /* Find the parameters used in the loop bounds. */ + for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++) + { + tree nb_iters = number_of_latch_executions (loop); + + if (!chrec_contains_symbols (nb_iters)) + continue; + + nb_iters = analyze_scalar_evolution (loop, nb_iters); + nb_iters = instantiate_scev (outermost_loop_in_scop (scop, loop->header), + loop, nb_iters); + scan_tree_for_params (scop, nb_iters, NULL, 0, one, false); + } + + value_clear (one); + + /* Find the parameters used in data accesses. */ + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + find_params_in_bb (scop, GBB_BB (gb)); +} + +/* Build the context constraints for SCOP: constraints and relations + on parameters. */ + +static void +build_scop_context (scop_p scop) +{ + int nb_params = scop_nb_params (scop); + CloogMatrix *matrix = cloog_matrix_alloc (1, nb_params + 2); + + /* Insert '0 >= 0' in the context matrix, as it is not allowed to be + empty. */ + + value_set_si (matrix->p[0][0], 1); + + value_set_si (matrix->p[0][nb_params + 1], 0); + + cloog_program_set_context (SCOP_PROG (scop), + cloog_domain_matrix2domain (matrix)); + cloog_matrix_free (matrix); +} + +/* Returns a graphite_bb from BB. */ + +static inline graphite_bb_p +gbb_from_bb (basic_block bb) +{ + return (graphite_bb_p) bb->aux; +} + +/* Add DOMAIN to all the basic blocks in LOOP. */ + +static void +add_bb_domains (struct loop *loop, CloogMatrix *domain) +{ + basic_block *bbs = get_loop_body (loop); + unsigned i; + + for (i = 0; i < loop->num_nodes; i++) + if (bbs[i]->loop_father == loop) + { + graphite_bb_p gbb = gbb_from_bb (bbs[i]); + GBB_DOMAIN (gbb) = cloog_matrix_copy (domain); + } + + free (bbs); +} + +/* Builds the constraint matrix for LOOP in SCOP. NB_OUTER_LOOPS is the + number of loops surrounding LOOP in SCOP. OUTER_CSTR gives the + constraints matrix for the surrounding loops. */ + +static void +build_loop_iteration_domains (scop_p scop, struct loop *loop, + CloogMatrix *outer_cstr, int nb_outer_loops) +{ + int i, j, row; + CloogMatrix *cstr; + + int nb_rows = outer_cstr->NbRows + 1; + int nb_cols = outer_cstr->NbColumns + 1; + + /* Last column of CSTR is the column of constants. */ + int cst_col = nb_cols - 1; + + /* The column for the current loop is just after the columns of + other outer loops. */ + int loop_col = nb_outer_loops + 1; + + tree nb_iters = number_of_latch_executions (loop); + + /* When the number of iterations is a constant or a parameter, we + add a constraint for the upper bound of the loop. So add a row + to the constraint matrix before allocating it. */ + if (TREE_CODE (nb_iters) == INTEGER_CST + || !chrec_contains_undetermined (nb_iters)) + nb_rows++; + + cstr = cloog_matrix_alloc (nb_rows, nb_cols); + + /* Copy the outer constraints. */ + for (i = 0; i < outer_cstr->NbRows; i++) + { + /* Copy the eq/ineq and loops columns. */ + for (j = 0; j < loop_col; j++) + value_assign (cstr->p[i][j], outer_cstr->p[i][j]); + + /* Leave an empty column in CSTR for the current loop, and then + copy the parameter columns. */ + for (j = loop_col; j < outer_cstr->NbColumns; j++) + value_assign (cstr->p[i][j + 1], outer_cstr->p[i][j]); + } + + /* 0 <= loop_i */ + row = outer_cstr->NbRows; + value_set_si (cstr->p[row][0], 1); + value_set_si (cstr->p[row][loop_col], 1); + + /* loop_i <= nb_iters */ + if (TREE_CODE (nb_iters) == INTEGER_CST) + { + row++; + value_set_si (cstr->p[row][0], 1); + value_set_si (cstr->p[row][loop_col], -1); + + value_set_si (cstr->p[row][cst_col], + int_cst_value (nb_iters)); + } + else if (!chrec_contains_undetermined (nb_iters)) + { + /* Otherwise nb_iters contains parameters: scan the nb_iters + expression and build its matrix representation. */ + Value one; + + row++; + value_set_si (cstr->p[row][0], 1); + value_set_si (cstr->p[row][loop_col], -1); + nb_iters = analyze_scalar_evolution (loop, nb_iters); + nb_iters = + instantiate_scev (outermost_loop_in_scop (scop, loop->header), + loop, nb_iters); + value_init (one); + value_set_si (one, 1); + scan_tree_for_params (scop, nb_iters, cstr, row, one, false); + value_clear (one); + } + else + gcc_unreachable (); + + if (loop->inner && loop_in_scop_p (loop->inner, scop)) + build_loop_iteration_domains (scop, loop->inner, cstr, nb_outer_loops + 1); + + /* Only go to the next loops, if we are not at the outermost layer. These + have to be handled seperately, as we can be sure, that the chain at this + layer will be connected. */ + if (nb_outer_loops != 0 && loop->next && loop_in_scop_p (loop->next, scop)) + build_loop_iteration_domains (scop, loop->next, outer_cstr, nb_outer_loops); + + add_bb_domains (loop, cstr); + + cloog_matrix_free (cstr); +} + +/* Add conditions to the domain of GB. */ + +static void +add_conditions_to_domain (graphite_bb_p gb) +{ + unsigned int i,j; + gimple stmt; + VEC (gimple, heap) *conditions = GBB_CONDITIONS (gb); + CloogMatrix *domain = GBB_DOMAIN (gb); + scop_p scop = GBB_SCOP (gb); + + unsigned nb_rows; + unsigned nb_cols; + unsigned nb_new_rows = 0; + unsigned row; + + if (VEC_empty (gimple, conditions)) + return; + + if (domain) + { + nb_rows = domain->NbRows; + nb_cols = domain->NbColumns; + } + else + { + nb_rows = 0; + nb_cols = scop_nb_params (scop) + 2; + } + + /* Count number of necessary new rows to add the conditions to the + domain. */ + for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++) + { + switch (gimple_code (stmt)) + { + case GIMPLE_COND: + { + enum tree_code code = gimple_cond_code (stmt); + + switch (code) + { + case NE_EXPR: + case EQ_EXPR: + /* NE and EQ statements are not supported right know. */ + gcc_unreachable (); + break; + case LT_EXPR: + case GT_EXPR: + case LE_EXPR: + case GE_EXPR: + nb_new_rows++; + break; + default: + gcc_unreachable (); + break; + } + break; + } + case SWITCH_EXPR: + /* Switch statements are not supported right know. */ + gcc_unreachable (); + break; + + default: + gcc_unreachable (); + break; + } + } + + + /* Enlarge the matrix. */ + { + CloogMatrix *new_domain; + new_domain = cloog_matrix_alloc (nb_rows + nb_new_rows, nb_cols); + + for (i = 0; i < nb_rows; i++) + for (j = 0; j < nb_cols; j++) + value_assign (new_domain->p[i][j], domain->p[i][j]); + + cloog_matrix_free (domain); + domain = new_domain; + GBB_DOMAIN (gb) = new_domain; + } + + /* Add the conditions to the new enlarged domain matrix. */ + row = nb_rows; + for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++) + { + switch (gimple_code (stmt)) + { + case GIMPLE_COND: + { + Value one; + enum tree_code code; + tree left; + tree right; + loop_p loop = GBB_BB (gb)->loop_father; + loop_p outermost = outermost_loop_in_scop (scop, GBB_BB (gb)); + + left = gimple_cond_lhs (stmt); + right = gimple_cond_rhs (stmt); + + left = analyze_scalar_evolution (loop, left); + right = analyze_scalar_evolution (loop, right); + left = instantiate_scev (outermost, loop, left); + right = instantiate_scev (outermost, loop, right); + + code = gimple_cond_code (stmt); + + /* The conditions for ELSE-branches are inverted. */ + if (VEC_index (gimple, gb->condition_cases, i) == NULL) + code = invert_tree_comparison (code, false); + + switch (code) + { + case NE_EXPR: + /* NE statements are not supported right know. */ + gcc_unreachable (); + break; + case EQ_EXPR: + value_set_si (domain->p[row][0], 1); + value_init (one); + value_set_si (one, 1); + scan_tree_for_params (scop, left, domain, row, one, true); + value_set_si (one, 1); + scan_tree_for_params (scop, right, domain, row, one, false); + row++; + value_set_si (domain->p[row][0], 1); + value_set_si (one, 1); + scan_tree_for_params (scop, left, domain, row, one, false); + value_set_si (one, 1); + scan_tree_for_params (scop, right, domain, row, one, true); + value_clear (one); + row++; + break; + case LT_EXPR: + value_set_si (domain->p[row][0], 1); + value_init (one); + value_set_si (one, 1); + scan_tree_for_params (scop, left, domain, row, one, true); + value_set_si (one, 1); + scan_tree_for_params (scop, right, domain, row, one, false); + value_sub_int (domain->p[row][nb_cols - 1], + domain->p[row][nb_cols - 1], 1); + value_clear (one); + row++; + break; + case GT_EXPR: + value_set_si (domain->p[row][0], 1); + value_init (one); + value_set_si (one, 1); + scan_tree_for_params (scop, left, domain, row, one, false); + value_set_si (one, 1); + scan_tree_for_params (scop, right, domain, row, one, true); + value_sub_int (domain->p[row][nb_cols - 1], + domain->p[row][nb_cols - 1], 1); + value_clear (one); + row++; + break; + case LE_EXPR: + value_set_si (domain->p[row][0], 1); + value_init (one); + value_set_si (one, 1); + scan_tree_for_params (scop, left, domain, row, one, true); + value_set_si (one, 1); + scan_tree_for_params (scop, right, domain, row, one, false); + value_clear (one); + row++; + break; + case GE_EXPR: + value_set_si (domain->p[row][0], 1); + value_init (one); + value_set_si (one, 1); + scan_tree_for_params (scop, left, domain, row, one, false); + value_set_si (one, 1); + scan_tree_for_params (scop, right, domain, row, one, true); + value_clear (one); + row++; + break; + default: + gcc_unreachable (); + break; + } + break; + } + case GIMPLE_SWITCH: + /* Switch statements are not supported right know. */ + gcc_unreachable (); + break; + + default: + gcc_unreachable (); + break; + } + } +} + +/* Helper recursive function. */ + +static void +build_scop_conditions_1 (VEC (gimple, heap) **conditions, + VEC (gimple, heap) **cases, basic_block bb, + scop_p scop) +{ + int i, j; + graphite_bb_p gbb; + gimple_stmt_iterator gsi; + basic_block bb_child, bb_iter; + VEC (basic_block, heap) *dom; + + /* Make sure we are in the SCoP. */ + if (!bb_in_scop_p (bb, scop)) + return; + + /* Record conditions in graphite_bb. */ + gbb = gbb_from_bb (bb); + GBB_CONDITIONS (gbb) = VEC_copy (gimple, heap, *conditions); + GBB_CONDITION_CASES (gbb) = VEC_copy (gimple, heap, *cases); + + add_conditions_to_domain (gbb); + + dom = get_dominated_by (CDI_DOMINATORS, bb); + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + VEC (edge, gc) *edges; + edge e; + + switch (gimple_code (stmt)) + { + case GIMPLE_COND: + edges = bb->succs; + for (i = 0; VEC_iterate (edge, edges, i, e); i++) + if ((dominated_by_p (CDI_DOMINATORS, e->dest, bb)) + && VEC_length (edge, e->dest->preds) == 1) + { + /* Remove the scanned block from the dominator successors. */ + for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++) + if (bb_iter == e->dest) + { + VEC_unordered_remove (basic_block, dom, j); + break; + } + + /* Recursively scan the then or else part. */ + if (e->flags & EDGE_TRUE_VALUE) + VEC_safe_push (gimple, heap, *cases, stmt); + else if (e->flags & EDGE_FALSE_VALUE) + VEC_safe_push (gimple, heap, *cases, NULL); + else + gcc_unreachable (); + + VEC_safe_push (gimple, heap, *conditions, stmt); + build_scop_conditions_1 (conditions, cases, e->dest, scop); + VEC_pop (gimple, *conditions); + VEC_pop (gimple, *cases); + } + break; + + case GIMPLE_SWITCH: + { + unsigned i; + gimple_stmt_iterator gsi_search_gimple_label; + + for (i = 0; i < gimple_switch_num_labels (stmt); ++i) + { + basic_block bb_iter; + size_t k; + size_t n_cases = VEC_length (gimple, *conditions); + unsigned n = gimple_switch_num_labels (stmt); + + bb_child = label_to_block + (CASE_LABEL (gimple_switch_label (stmt, i))); + + /* Do not handle multiple values for the same block. */ + for (k = 0; k < n; k++) + if (i != k + && label_to_block + (CASE_LABEL (gimple_switch_label (stmt, k))) == bb_child) + break; + + if (k != n) + continue; + + /* Switch cases with more than one predecessor are not + handled. */ + if (VEC_length (edge, bb_child->preds) != 1) + continue; + + /* Recursively scan the corresponding 'case' block. */ + + for (gsi_search_gimple_label = gsi_start_bb (bb_child); + !gsi_end_p (gsi_search_gimple_label); + gsi_next (&gsi_search_gimple_label)) + { + gimple stmt_gimple_label + = gsi_stmt (gsi_search_gimple_label); + + if (gimple_code (stmt_gimple_label) == GIMPLE_LABEL) + { + tree t = gimple_label_label (stmt_gimple_label); + + if (t == gimple_switch_label (stmt, i)) + VEC_replace (gimple, *cases, n_cases, + stmt_gimple_label); + else + gcc_unreachable (); + } + } + + build_scop_conditions_1 (conditions, cases, bb_child, scop); + + /* Remove the scanned block from the dominator successors. */ + for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++) + if (bb_iter == bb_child) + { + VEC_unordered_remove (basic_block, dom, j); + break; + } + } + + VEC_pop (gimple, *conditions); + VEC_pop (gimple, *cases); + break; + } + default: + break; + } + } + + /* Scan all immediate dominated successors. */ + for (i = 0; VEC_iterate (basic_block, dom, i, bb_child); i++) + build_scop_conditions_1 (conditions, cases, bb_child, scop); + + VEC_free (basic_block, heap, dom); +} + +/* Record all 'if' and 'switch' conditions in each gbb of SCOP. */ + +static void +build_scop_conditions (scop_p scop) +{ + VEC (gimple, heap) *conditions = NULL; + VEC (gimple, heap) *cases = NULL; + + build_scop_conditions_1 (&conditions, &cases, SCOP_ENTRY (scop), scop); + + VEC_free (gimple, heap, conditions); + VEC_free (gimple, heap, cases); +} + +/* Build the current domain matrix: the loops belonging to the current + SCOP, and that vary for the execution of the current basic block. + Returns false if there is no loop in SCOP. */ + +static bool +build_scop_iteration_domain (scop_p scop) +{ + struct loop *loop; + CloogMatrix *outer_cstr; + int i; + + /* Build cloog loop for all loops, that are in the uppermost loop layer of + this SCoP. */ + for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++) + if (!loop_in_scop_p (loop_outer (loop), scop)) + { + /* The outermost constraints is a matrix that has: + -first column: eq/ineq boolean + -last column: a constant + -scop_nb_params columns for the parameters used in the scop. */ + outer_cstr = cloog_matrix_alloc (0, scop_nb_params (scop) + 2); + build_loop_iteration_domains (scop, loop, outer_cstr, 0); + cloog_matrix_free (outer_cstr); + } + + return (i != 0); +} + +/* Initializes an equation CY of the access matrix using the + information for a subscript from ACCESS_FUN, relatively to the loop + indexes from LOOP_NEST and parameter indexes from PARAMS. NDIM is + the dimension of the array access, i.e. the number of + subscripts. Returns true when the operation succeeds. */ + +static bool +build_access_matrix_with_af (tree access_fun, lambda_vector cy, + scop_p scop, int ndim) +{ + switch (TREE_CODE (access_fun)) + { + case POLYNOMIAL_CHREC: + { + tree left = CHREC_LEFT (access_fun); + tree right = CHREC_RIGHT (access_fun); + int var; + + if (TREE_CODE (right) != INTEGER_CST) + return false; + + var = loop_iteration_vector_dim (CHREC_VARIABLE (access_fun), scop); + cy[var] = int_cst_value (right); + + switch (TREE_CODE (left)) + { + case POLYNOMIAL_CHREC: + return build_access_matrix_with_af (left, cy, scop, ndim); + + case INTEGER_CST: + cy[ndim - 1] = int_cst_value (left); + return true; + + default: + /* FIXME: access_fn can have parameters. */ + return false; + } + } + case INTEGER_CST: + cy[ndim - 1] = int_cst_value (access_fun); + return true; + + default: + /* FIXME: access_fn can have parameters. */ + return false; + } +} + +/* Initialize the access matrix in the data reference REF with respect + to the loop nesting LOOP_NEST. Return true when the operation + succeeded. */ + +static bool +build_access_matrix (data_reference_p ref, graphite_bb_p gb) +{ + int i, ndim = DR_NUM_DIMENSIONS (ref); + struct access_matrix *am = GGC_NEW (struct access_matrix); + + AM_MATRIX (am) = VEC_alloc (lambda_vector, heap, ndim); + DR_SCOP (ref) = GBB_SCOP (gb); + + for (i = 0; i < ndim; i++) + { + lambda_vector v = lambda_vector_new (ref_nb_loops (ref)); + scop_p scop = GBB_SCOP (gb); + tree af = DR_ACCESS_FN (ref, i); + + if (!build_access_matrix_with_af (af, v, scop, ref_nb_loops (ref))) + return false; + + VEC_safe_push (lambda_vector, heap, AM_MATRIX (am), v); + } + + DR_ACCESS_MATRIX (ref) = am; + return true; +} + +/* Build the access matrices for the data references in the SCOP. */ + +static void +build_scop_data_accesses (scop_p scop) +{ + int i; + graphite_bb_p gb; + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + { + int j; + gimple_stmt_iterator gsi; + data_reference_p dr; + struct loop *nest = outermost_loop_in_scop (scop, GBB_BB (gb)); + + /* On each statement of the basic block, gather all the occurences + to read/write memory. */ + GBB_DATA_REFS (gb) = VEC_alloc (data_reference_p, heap, 5); + for (gsi = gsi_start_bb (GBB_BB (gb)); !gsi_end_p (gsi); gsi_next (&gsi)) + find_data_references_in_stmt (nest, gsi_stmt (gsi), + &GBB_DATA_REFS (gb)); + + /* FIXME: Construction of access matrix is disabled until some + pass, like the data dependence analysis, is using it. */ + continue; + + /* Construct the access matrix for each data ref, with respect to + the loop nest of the current BB in the considered SCOP. */ + for (j = 0; + VEC_iterate (data_reference_p, GBB_DATA_REFS (gb), j, dr); + j++) + { + bool res = build_access_matrix (dr, gb); + + /* FIXME: At this point the DRs should always have an affine + form. For the moment this fails as build_access_matrix + does not build matrices with parameters. */ + gcc_assert (res); + } + } +} + +/* Converts a GMP constant value to a tree and returns it. */ + +static tree +gmp_cst_to_tree (Value v) +{ + return build_int_cst (integer_type_node, value_get_si (v)); +} + +/* Returns the tree variable from the name NAME that was given in + Cloog representation. All the parameters are stored in PARAMS, and + all the loop induction variables are stored in IVSTACK. + + FIXME: This is a hack, and Cloog should be fixed to not work with + variable names represented as "char *string", but with void + pointers that could be casted back to a tree. The only problem in + doing that is that Cloog's pretty printer still assumes that + variable names are char *strings. The solution would be to have a + function pointer for pretty-printing that can be redirected to be + print_generic_stmt in our case, or fprintf by default. + ??? Too ugly to live. */ + +static tree +clast_name_to_gcc (const char *name, VEC (name_tree, heap) *params, + loop_iv_stack ivstack) +{ + int i; + name_tree t; + tree iv; + + for (i = 0; VEC_iterate (name_tree, params, i, t); i++) + if (!strcmp (name, t->name)) + return t->t; + + iv = loop_iv_stack_get_iv_from_name (ivstack, name); + if (iv) + return iv; + + gcc_unreachable (); +} + +/* Converts a Cloog AST expression E back to a GCC expression tree. */ + +static tree +clast_to_gcc_expression (struct clast_expr *e, + VEC (name_tree, heap) *params, + loop_iv_stack ivstack) +{ + tree type = integer_type_node; + + gcc_assert (e); + + switch (e->type) + { + case expr_term: + { + struct clast_term *t = (struct clast_term *) e; + + if (t->var) + { + if (value_one_p (t->val)) + return clast_name_to_gcc (t->var, params, ivstack); + + else if (value_mone_p (t->val)) + return fold_build1 (NEGATE_EXPR, type, + clast_name_to_gcc (t->var, params, ivstack)); + else + return fold_build2 (MULT_EXPR, type, + gmp_cst_to_tree (t->val), + clast_name_to_gcc (t->var, params, ivstack)); + } + else + return gmp_cst_to_tree (t->val); + } + + case expr_red: + { + struct clast_reduction *r = (struct clast_reduction *) e; + tree left, right; + + switch (r->type) + { + case clast_red_sum: + if (r->n == 1) + return clast_to_gcc_expression (r->elts[0], params, ivstack); + + else + { + gcc_assert (r->n >= 1 + && r->elts[0]->type == expr_term + && r->elts[1]->type == expr_term); + + left = clast_to_gcc_expression (r->elts[0], params, ivstack); + right = clast_to_gcc_expression (r->elts[1], params, ivstack); + return fold_build2 (PLUS_EXPR, type, left, right); + } + + break; + + case clast_red_min: + if (r->n == 1) + return clast_to_gcc_expression (r->elts[0], params, ivstack); + + else if (r->n == 2) + { + left = clast_to_gcc_expression (r->elts[0], params, ivstack); + right = clast_to_gcc_expression (r->elts[1], params, ivstack); + return fold_build2 (MIN_EXPR, type, left, right); + } + + else + gcc_unreachable(); + + break; + + case clast_red_max: + if (r->n == 1) + return clast_to_gcc_expression (r->elts[0], params, ivstack); + + else if (r->n == 2) + { + left = clast_to_gcc_expression (r->elts[0], params, ivstack); + right = clast_to_gcc_expression (r->elts[1], params, ivstack); + return fold_build2 (MAX_EXPR, type, left, right); + } + + else + gcc_unreachable(); + + break; + + default: + gcc_unreachable (); + } + break; + } + + case expr_bin: + { + struct clast_binary *b = (struct clast_binary *) e; + struct clast_expr *lhs = (struct clast_expr *) b->LHS; + struct clast_expr *rhs = (struct clast_expr *) b->RHS; + tree tl = clast_to_gcc_expression (lhs, params, ivstack); + + /* FIXME: The next statement produces a warning: Cloog assumes + that the RHS is a constant, but this is a "void *" pointer + that should be casted into a Value, but this cast cannot be + done as Value is a GMP type, that is an array. Cloog must + be fixed for removing this warning. */ + tree tr = gmp_cst_to_tree (rhs); + + switch (b->type) + { + case clast_bin_fdiv: + return fold_build2 (FLOOR_DIV_EXPR, type, tl, tr); + + case clast_bin_cdiv: + return fold_build2 (CEIL_DIV_EXPR, type, tl, tr); + + case clast_bin_div: + return fold_build2 (EXACT_DIV_EXPR, type, tl, tr); + + case clast_bin_mod: + return fold_build2 (TRUNC_MOD_EXPR, type, tl, tr); + + default: + gcc_unreachable (); + } + } + + default: + gcc_unreachable (); + } + + return NULL_TREE; +} + +/* Translates a clast equation CLEQ to a tree. */ + +static tree +graphite_translate_clast_equation (scop_p scop, + struct clast_equation *cleq, + loop_iv_stack ivstack) +{ + enum tree_code comp; + tree lhs = clast_to_gcc_expression (cleq->LHS, SCOP_PARAMS (scop), ivstack); + tree rhs = clast_to_gcc_expression (cleq->RHS, SCOP_PARAMS (scop), ivstack); + + if (cleq->sign == 0) + comp = EQ_EXPR; + + else if (cleq->sign > 0) + comp = GE_EXPR; + + else + comp = LE_EXPR; + + return fold_build2 (comp, integer_type_node, lhs, rhs); +} + +/* Creates the test for the condition in STMT. */ + +static tree +graphite_create_guard_cond_expr (scop_p scop, struct clast_guard *stmt, + loop_iv_stack ivstack) +{ + tree cond = NULL; + int i; + + for (i = 0; i < stmt->n; i++) + { + tree eq = graphite_translate_clast_equation (scop, &stmt->eq[i], ivstack); + + if (cond) + cond = fold_build2 (TRUTH_AND_EXPR, integer_type_node, cond, eq); + else + cond = eq; + } + + return cond; +} + +/* Creates a new if region corresponding to Cloog's guard. */ + +static edge +graphite_create_new_guard (scop_p scop, edge entry_edge, + struct clast_guard *stmt, + loop_iv_stack ivstack) +{ + tree cond_expr = graphite_create_guard_cond_expr (scop, stmt, ivstack); + edge exit_edge = create_empty_if_region_on_edge (entry_edge, cond_expr); + return exit_edge; +} + + +/* Creates a new LOOP corresponding to Cloog's STMT. Inserts an induction + variable for the new LOOP. New LOOP is attached to CFG starting at + ENTRY_EDGE. LOOP is inserted into the loop tree and becomes the child + loop of the OUTER_LOOP. */ + +static struct loop * +graphite_create_new_loop (scop_p scop, edge entry_edge, + struct clast_for *stmt, loop_iv_stack ivstack, + loop_p outer) +{ + struct loop *loop; + tree ivvar; + tree stride, lowb, upb; + tree iv_before; + + gcc_assert (stmt->LB + && stmt->UB); + + stride = gmp_cst_to_tree (stmt->stride); + lowb = clast_to_gcc_expression (stmt->LB, SCOP_PARAMS (scop), ivstack); + ivvar = create_tmp_var (integer_type_node, "graphiteIV"); + add_referenced_var (ivvar); + + upb = clast_to_gcc_expression (stmt->UB, SCOP_PARAMS (scop), ivstack); + loop = create_empty_loop_on_edge (entry_edge, lowb, stride, upb, ivvar, + &iv_before, outer ? outer + : entry_edge->src->loop_father); + + loop_iv_stack_push (ivstack, iv_before, stmt->iterator); + + return loop; +} + +/* Remove all the edges from EDGES except the edge KEEP. */ + +static void +remove_all_edges_1 (VEC (edge, gc) *edges, edge keep) +{ + edge e; + edge_iterator ei; + + for (ei = ei_start (edges); (e = ei_safe_edge (ei)); ) + { + if (e != keep) + { + remove_edge (e); + e = ei_safe_edge (ei); + } + else + ei_next (&ei); + } +} + +/* Remove all the edges from BB except the edge KEEP. */ + +static void +remove_all_edges (basic_block bb, edge keep) +{ + remove_all_edges_1 (bb->succs, keep); + remove_all_edges_1 (bb->preds, keep); +} + +/* Rename the SSA_NAMEs used in STMT and that appear in IVSTACK. */ + +static void +graphite_rename_ivs_stmt (gimple stmt, graphite_bb_p gbb, scop_p scop, + loop_p old, loop_iv_stack ivstack) +{ + ssa_op_iter iter; + use_operand_p use_p; + + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) + { + tree use = USE_FROM_PTR (use_p); + tree new_iv = NULL; + name_tree old_iv = get_old_iv_from_ssa_name (scop, old, use); + + if (old_iv) + new_iv = loop_iv_stack_get_iv (ivstack, + gbb_loop_index (gbb, old_iv->loop)); + + if (new_iv) + SET_USE (use_p, new_iv); + } +} + +/* Returns true if SSA_NAME is a parameter of SCOP. */ + +static bool +is_parameter (scop_p scop, tree ssa_name) +{ + int i; + VEC (name_tree, heap) *params = SCOP_PARAMS (scop); + name_tree param; + + for (i = 0; VEC_iterate (name_tree, params, i, param); i++) + if (param->t == ssa_name) + return true; + + return false; +} + +/* Returns true if NAME is an old induction variable in SCOP. OLD is + the original loop that contained the definition of NAME. */ + +static bool +is_old_iv (scop_p scop, loop_p old, tree name) +{ + return get_old_iv_from_ssa_name (scop, old, name) != NULL; + +} + +static void expand_scalar_variables_stmt (gimple, graphite_bb_p, scop_p, loop_p, + loop_iv_stack); + +/* Constructs a tree which only contains old_ivs and parameters. Any + other variables that are defined outside GBB will be eliminated by + using their definitions in the constructed tree. OLD_LOOP_FATHER + is the original loop that contained GBB. */ + +static tree +expand_scalar_variables_expr (tree type, tree op0, enum tree_code code, + tree op1, graphite_bb_p gbb, scop_p scop, + loop_p old_loop_father, loop_iv_stack ivstack) +{ + if (TREE_CODE_CLASS (code) == tcc_constant + && code == INTEGER_CST) + return op0; + + if (TREE_CODE_CLASS (code) == tcc_unary) + { + tree op0_type = TREE_TYPE (op0); + enum tree_code op0_code = TREE_CODE (op0); + tree op0_expr = + expand_scalar_variables_expr (op0_type, op0, op0_code, + NULL, gbb, scop, old_loop_father, + ivstack); + + return fold_build1 (code, type, op0_expr); + } + + if (TREE_CODE_CLASS (code) == tcc_binary) + { + tree op0_type = TREE_TYPE (op0); + enum tree_code op0_code = TREE_CODE (op0); + tree op0_expr = + expand_scalar_variables_expr (op0_type, op0, op0_code, + NULL, gbb, scop, old_loop_father, + ivstack); + tree op1_type = TREE_TYPE (op1); + enum tree_code op1_code = TREE_CODE (op1); + tree op1_expr = + expand_scalar_variables_expr (op1_type, op1, op1_code, + NULL, gbb, scop, old_loop_father, + ivstack); + + return fold_build2 (code, type, op0_expr, op1_expr); + } + + if (code == SSA_NAME) + { + tree var0, var1; + gimple def_stmt; + enum tree_code subcode; + + if(is_parameter (scop, op0) || + is_old_iv (scop, old_loop_father, op0)) + return op0; + + def_stmt = SSA_NAME_DEF_STMT (op0); + + if (gimple_bb (def_stmt) == GBB_BB (gbb)) + { + /* If the defining statement is in the basic block already + we do not need to create a new expression for it, we + only need to ensure its operands are expanded. */ + expand_scalar_variables_stmt (def_stmt, gbb, scop, + old_loop_father, ivstack); + return op0; + + } + else + { + if (gimple_code (def_stmt) != GIMPLE_ASSIGN) + return op0; + + var0 = gimple_assign_rhs1 (def_stmt); + subcode = gimple_assign_rhs_code (def_stmt); + var1 = gimple_assign_rhs2 (def_stmt); + + return expand_scalar_variables_expr (type, var0, subcode, var1, + gbb, scop, old_loop_father, + ivstack); + } + } + + gcc_unreachable (); + return NULL; +} + +/* Replicates any uses of non-parameters and non-old-ivs variablesthat + are defind outside GBB with code that is inserted in GBB. + OLD_LOOP_FATHER is the original loop that contained STMT. */ + +static void +expand_scalar_variables_stmt (gimple stmt, graphite_bb_p gbb, scop_p scop, + loop_p old_loop_father, loop_iv_stack ivstack) +{ + ssa_op_iter iter; + use_operand_p use_p; + basic_block bb = GBB_BB (gbb); + + FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) + { + tree use = USE_FROM_PTR (use_p); + tree type = TREE_TYPE (use); + enum tree_code code = TREE_CODE (use); + tree use_expr = expand_scalar_variables_expr (type, use, code, NULL, + gbb, scop, old_loop_father, + ivstack); + if (use_expr != use) + { + gimple_stmt_iterator gsi = gsi_after_labels (bb); + tree new_use = + force_gimple_operand_gsi (&gsi, use_expr, true, NULL, + true, GSI_NEW_STMT); + SET_USE (use_p, new_use); + } + } +} + +/* Copies the definitions outside of GBB of variables that are not + induction variables nor parameters. GBB must only contain + "external" references to these types of variables. OLD_LOOP_FATHER + is the original loop that contained GBB. */ + +static void +expand_scalar_variables (graphite_bb_p gbb, scop_p scop, + loop_p old_loop_father, loop_iv_stack ivstack) +{ + basic_block bb = GBB_BB (gbb); + gimple_stmt_iterator gsi; + + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) + { + gimple stmt = gsi_stmt (gsi); + expand_scalar_variables_stmt (stmt, gbb, scop, old_loop_father, + ivstack); + gsi_next (&gsi); + } +} + +/* Rename all the SSA_NAMEs from block GBB that appear in IVSTACK in + terms of new induction variables. OLD_LOOP_FATHER is the original + loop that contained GBB. */ + +static void +graphite_rename_ivs (graphite_bb_p gbb, scop_p scop, loop_p old_loop_father, + loop_iv_stack ivstack) +{ + basic_block bb = GBB_BB (gbb); + gimple_stmt_iterator gsi; + + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) + { + gimple stmt = gsi_stmt (gsi); + + if (gimple_get_lhs (stmt) + && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME + && get_old_iv_from_ssa_name (scop, old_loop_father, + gimple_get_lhs (stmt))) + gsi_remove (&gsi, false); + else + { + graphite_rename_ivs_stmt (stmt, gbb, scop, old_loop_father, ivstack); + gsi_next (&gsi); + } + } +} + +/* Move all the PHI nodes from block FROM to block TO. + OLD_LOOP_FATHER is the original loop that contained FROM. */ + +static void +move_phi_nodes (scop_p scop, loop_p old_loop_father, basic_block from, + basic_block to) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_phis (from); !gsi_end_p (gsi);) + { + gimple phi = gsi_stmt (gsi); + tree op = gimple_phi_result (phi); + + if (get_old_iv_from_ssa_name (scop, old_loop_father, op) == NULL) + { + gimple new_phi = make_phi_node (op, 0); + add_phi_node_to_bb (new_phi, to); + } + remove_phi_node (&gsi, false); + } +} + +/* Remove condition from BB. */ + +static void +remove_condition (basic_block bb) +{ + gimple last = last_stmt (bb); + + if (last && gimple_code (last) == GIMPLE_COND) + { + gimple_stmt_iterator gsi = gsi_last_bb (bb); + gsi_remove (&gsi, true); + } +} + +/* Returns the first successor edge of BB with EDGE_TRUE_VALUE flag set. */ + +static edge +get_true_edge_from_guard_bb (basic_block bb) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->flags & EDGE_TRUE_VALUE) + return e; + + gcc_unreachable (); + return NULL; +} + +/* Translates a CLAST statement STMT to GCC representation. NEXT_E is + the edge where new generated code should be attached. BB_EXIT is the last + basic block that defines the scope of code generation. CONTEXT_LOOP is the + loop in which the generated code will be placed (might be NULL). */ + +static edge +translate_clast (scop_p scop, struct loop *context_loop, + struct clast_stmt *stmt, edge next_e, loop_iv_stack ivstack) +{ + if (!stmt) + return next_e; + + if (CLAST_STMT_IS_A (stmt, stmt_root)) + return translate_clast (scop, context_loop, stmt->next, next_e, ivstack); + + if (CLAST_STMT_IS_A (stmt, stmt_user)) + { + CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement; + graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs); + basic_block bb = gbb->bb; + loop_p old_loop_father = bb->loop_father; + + if (bb == ENTRY_BLOCK_PTR) + return next_e; + + remove_condition (bb); + expand_scalar_variables (gbb, scop, old_loop_father, ivstack); + remove_all_edges (bb, next_e); + move_phi_nodes (scop, old_loop_father, bb, next_e->src); + redirect_edge_succ_nodup (next_e, bb); + + if (context_loop) + { + remove_bb_from_loops (bb); + add_bb_to_loop (bb, context_loop); + } + + set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src); + mark_virtual_ops_in_bb (bb); + next_e = make_edge (bb, + context_loop ? context_loop->latch : EXIT_BLOCK_PTR, + EDGE_FALLTHRU);; + graphite_rename_ivs (gbb, scop, old_loop_father, ivstack); + return translate_clast (scop, context_loop, stmt->next, next_e, ivstack); + } + + if (CLAST_STMT_IS_A (stmt, stmt_for)) + { + struct loop *loop + = graphite_create_new_loop (scop, next_e, (struct clast_for *) stmt, + ivstack, context_loop ? context_loop + : get_loop (0)); + edge last_e = single_exit (loop); + + next_e = translate_clast (scop, loop, ((struct clast_for *) stmt)->body, + single_pred_edge (loop->latch), ivstack); + redirect_edge_succ_nodup (next_e, loop->latch); + + set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src); + loop_iv_stack_pop (ivstack); + + return translate_clast (scop, context_loop, stmt->next, last_e, ivstack); + } + + if (CLAST_STMT_IS_A (stmt, stmt_guard)) + { + edge last_e = graphite_create_new_guard (scop, next_e, + ((struct clast_guard *) stmt), + ivstack); + edge true_e = get_true_edge_from_guard_bb (next_e->dest); + next_e = translate_clast (scop, context_loop, + ((struct clast_guard *) stmt)->then, + true_e, ivstack); + redirect_edge_succ_nodup (next_e, last_e->src); + return translate_clast (scop, context_loop, stmt->next, last_e, ivstack); + } + + if (CLAST_STMT_IS_A (stmt, stmt_block)) + { + next_e = translate_clast (scop, context_loop, + ((struct clast_block *) stmt)->body, + next_e, ivstack); + return translate_clast (scop, context_loop, stmt->next, next_e, ivstack); + } + + gcc_unreachable (); +} + +/* Build cloog program for SCoP. */ + +static void +build_cloog_prog (scop_p scop) +{ + int i; + int max_nb_loops = scop_max_loop_depth (scop); + graphite_bb_p gbb; + CloogLoop *loop_list = NULL; + CloogBlockList *block_list = NULL; + CloogDomainList *scattering = NULL; + CloogProgram *prog = SCOP_PROG (scop); + int nbs = 2 * max_nb_loops + 1; + int *scaldims = (int *) xmalloc (nbs * (sizeof (int))); + + cloog_program_set_nb_scattdims (prog, nbs); + initialize_cloog_names (scop); + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++) + { + /* Build new block. */ + CloogMatrix *domain = GBB_DOMAIN (gbb); + CloogStatement *stmt = cloog_statement_alloc (GBB_BB (gbb)->index); + CloogBlock *block = cloog_block_alloc (stmt, 0, NULL, + nb_loops_around_gb (gbb)); + cloog_statement_set_usr (stmt, gbb); + + /* Add empty domain to all bbs, which do not yet have a domain, as they + are not part of any loop. */ + if (domain == NULL) + { + domain = cloog_matrix_alloc (0, scop_nb_params (scop) + 2); + GBB_DOMAIN (gbb) = domain; + } + + /* Build loop list. */ + { + CloogLoop *new_loop_list = cloog_loop_malloc (); + cloog_loop_set_next (new_loop_list, loop_list); + cloog_loop_set_domain (new_loop_list, + cloog_domain_matrix2domain (domain)); + cloog_loop_set_block (new_loop_list, block); + loop_list = new_loop_list; + } + + /* Build block list. */ + { + CloogBlockList *new_block_list = cloog_block_list_malloc (); + + cloog_block_list_set_next (new_block_list, block_list); + cloog_block_list_set_block (new_block_list, block); + block_list = new_block_list; + } + + /* Build scattering list. */ + { + /* XXX: Replace with cloog_domain_list_alloc(), when available. */ + CloogDomainList *new_scattering + = (CloogDomainList *) xmalloc (sizeof (CloogDomainList)); + CloogMatrix *scat_mat = schedule_to_scattering (gbb, nbs); + + cloog_set_next_domain (new_scattering, scattering); + cloog_set_domain (new_scattering, + cloog_domain_matrix2domain (scat_mat)); + scattering = new_scattering; + cloog_matrix_free (scat_mat); + } + } + + cloog_program_set_loop (prog, loop_list); + cloog_program_set_blocklist (prog, block_list); + + for (i = 0; i < nbs; i++) + scaldims[i] = 0 ; + + cloog_program_set_scaldims (prog, scaldims); + + /* Extract scalar dimensions to simplify the code generation problem. */ + cloog_program_extract_scalars (prog, scattering); + + /* Apply scattering. */ + cloog_program_scatter (prog, scattering); + + /* Iterators corresponding to scalar dimensions have to be extracted. */ + cloog_names_scalarize (cloog_program_names (prog), nbs, + cloog_program_scaldims (prog)); + + /* Free blocklist. */ + { + CloogBlockList *next = cloog_program_blocklist (prog); + + while (next) + { + CloogBlockList *toDelete = next; + next = cloog_block_list_next (next); + cloog_block_list_set_next (toDelete, NULL); + cloog_block_list_set_block (toDelete, NULL); + cloog_block_list_free (toDelete); + } + cloog_program_set_blocklist (prog, NULL); + } +} + +/* Return the options that will be used in GLOOG. */ + +static CloogOptions * +set_cloog_options (void) +{ + CloogOptions *options = cloog_options_malloc (); + + /* Change cloog output language to C. If we do use FORTRAN instead, cloog + will stop e.g. with "ERROR: unbounded loops not allowed in FORTRAN.", if + we pass an incomplete program to cloog. */ + options->language = LANGUAGE_C; + + /* Enable complex equality spreading: removes dummy statements + (assignments) in the generated code which repeats the + substitution equations for statements. This is useless for + GLooG. */ + options->esp = 1; + + /* Enable C pretty-printing mode: normalizes the substitution + equations for statements. */ + options->cpp = 1; + + /* Allow cloog to build strides with a stride width different to one. + This example has stride = 4: + + for (i = 0; i < 20; i += 4) + A */ + options->strides = 1; + + /* Disable optimizations and make cloog generate source code closer to the + input. This is useful for debugging, but later we want the optimized + code. + + XXX: We can not disable optimizations, as loop blocking is not working + without them. */ + if (0) + { + options->f = -1; + options->l = INT_MAX; + } + + return options; +} + +/* Prints STMT to STDERR. */ + +void +debug_clast_stmt (struct clast_stmt *stmt) +{ + CloogOptions *options = set_cloog_options (); + + pprint (stderr, stmt, 0, options); +} + +/* Find the right transform for the SCOP, and return a Cloog AST + representing the new form of the program. */ + +static struct clast_stmt * +find_transform (scop_p scop) +{ + CloogProgram *prog; + struct clast_stmt *stmt; + CloogOptions *options = set_cloog_options (); + + /* Connect new cloog prog generation to graphite. */ + build_cloog_prog (scop); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Cloog Input [\n"); + cloog_program_print (dump_file, SCOP_PROG(scop)); + fprintf (dump_file, "]\n"); + } + + prog = cloog_program_generate (SCOP_PROG (scop), options); + stmt = cloog_clast_create (prog, options); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Cloog Output[\n"); + pprint (dump_file, stmt, 0, options); + cloog_program_dump_cloog (dump_file, prog); + fprintf (dump_file, "]\n"); + } + + cloog_options_free (options); + return stmt; +} + +/* Return a vector of all the virtual phi nodes in the current + function. */ + +static VEC (gimple, heap) * +collect_virtual_phis (void) +{ + gimple_stmt_iterator si; + gimple_vec phis = VEC_alloc (gimple, heap, 3); + basic_block bb; + + FOR_EACH_BB (bb) + for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) + /* The phis we moved will have 0 arguments because the + original edges were removed. */ + if (gimple_phi_num_args (gsi_stmt (si)) == 0) + VEC_safe_push (gimple, heap, phis, gsi_stmt (si)); + + /* Deallocate if we did not find any. */ + if (VEC_length (gimple, phis) == 0) + { + VEC_free (gimple, heap, phis); + phis = NULL; + } + + return phis; +} + +/* Find a virtual definition for variable VAR in BB. */ + +static tree +find_vdef_for_var_in_bb (basic_block bb, tree var) +{ + gimple_stmt_iterator gsi; + gimple phi; + def_operand_p def_var; + vuse_vec_p vv; + ssa_op_iter op_iter; + + for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) + FOR_EACH_SSA_VDEF_OPERAND (def_var, vv, gsi_stmt (gsi), op_iter) + if (SSA_NAME_VAR (*def_var) == var) + return *def_var; + + for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) + FOR_EACH_SSA_DEF_OPERAND (def_var, gsi_stmt (gsi), op_iter, SSA_OP_DEF) + if (SSA_NAME_VAR (*def_var) == var) + return *def_var; + + for (gsi = gsi_start_phis (bb); !gsi_end_p(gsi); gsi_next (&gsi)) + { + phi = gsi_stmt (gsi); + if (SSA_NAME_VAR (PHI_RESULT (phi)) == var) + return PHI_RESULT (phi); + } + + return NULL; +} + +/* Recursive helper. */ + +static tree +find_vdef_for_var_1 (basic_block bb, struct pointer_set_t *visited, tree var) +{ + tree result = NULL; + edge_iterator ei; + edge pred_edge; + + if (pointer_set_contains (visited, bb)) + return NULL; + + pointer_set_insert (visited, bb); + result = find_vdef_for_var_in_bb (bb, var); + + if (!result) + FOR_EACH_EDGE (pred_edge, ei, bb->preds) + if (!result) + result = find_vdef_for_var_1 (pred_edge->src, visited, var); + + return result; +} + +/* Finds a virtual definition for variable VAR. */ + +static tree +find_vdef_for_var (basic_block bb, tree var) +{ + struct pointer_set_t *visited = pointer_set_create (); + tree def = find_vdef_for_var_1 (bb, visited, var); + + pointer_set_destroy (visited); + return def; +} + +/* Update the virtual phis after loop bodies are moved to new + loops. */ + +static void +patch_phis_for_virtual_defs (void) +{ + int i; + gimple phi; + VEC (gimple, heap) *virtual_phis = collect_virtual_phis (); + + for (i = 0; VEC_iterate (gimple, virtual_phis, i, phi); i++) + { + basic_block bb = gimple_bb (phi); + edge_iterator ei; + edge pred_edge; + gimple_stmt_iterator gsi; + gimple new_phi; + tree phi_result = PHI_RESULT (phi); + tree var = SSA_NAME_VAR (phi_result); + + new_phi = create_phi_node (phi_result, bb); + SSA_NAME_DEF_STMT (phi_result) = new_phi; + + FOR_EACH_EDGE (pred_edge, ei, bb->preds) + { + tree def = find_vdef_for_var (pred_edge->src, var); + + if (def) + add_phi_arg (new_phi, def, pred_edge); + else + add_phi_arg (new_phi, gimple_default_def (cfun, var), pred_edge); + } + + gsi = gsi_for_stmt (phi); + remove_phi_node (&gsi, false); + } +} + +/* Mark the original loops of SCOP for removal, replacing their header + field with NULL. */ + +static void +mark_old_loops (scop_p scop) +{ + int i; + struct loop *loop; + + for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++) + { + loop->header = NULL; + loop->latch = NULL; + } +} + +/* Scan the loops and remove the ones that have been marked for + removal. */ + +static void +remove_dead_loops (void) +{ + struct loop *loop, *ploop; + loop_iterator li; + + FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) + { + /* Remove only those loops that we marked to be removed with + mark_old_loops. */ + if (loop->header) + continue; + + while (loop->inner) + { + ploop = loop->inner; + flow_loop_tree_node_remove (ploop); + flow_loop_tree_node_add (loop_outer (loop), ploop); + } + + /* Remove the loop and free its data. */ + delete_loop (loop); + } +} + +/* Returns true when it is possible to generate code for this STMT. + For the moment we cannot generate code when Cloog decides to + duplicate a statement, as we do not do a copy, but a move. + USED_BASIC_BLOCKS records the blocks that have already been seen. + We return false if we have to generate code twice for the same + block. */ + +static bool +can_generate_code_stmt (struct clast_stmt *stmt, + struct pointer_set_t *used_basic_blocks) +{ + if (!stmt) + return true; + + if (CLAST_STMT_IS_A (stmt, stmt_root)) + return can_generate_code_stmt (stmt->next, used_basic_blocks); + + if (CLAST_STMT_IS_A (stmt, stmt_user)) + { + CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement; + graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs); + + if (pointer_set_contains (used_basic_blocks, gbb)) + return false; + pointer_set_insert (used_basic_blocks, gbb); + return can_generate_code_stmt (stmt->next, used_basic_blocks); + } + + if (CLAST_STMT_IS_A (stmt, stmt_for)) + return can_generate_code_stmt (((struct clast_for *) stmt)->body, + used_basic_blocks) + && can_generate_code_stmt (stmt->next, used_basic_blocks); + + if (CLAST_STMT_IS_A (stmt, stmt_guard)) + return can_generate_code_stmt (((struct clast_guard *) stmt)->then, + used_basic_blocks); + + if (CLAST_STMT_IS_A (stmt, stmt_block)) + return can_generate_code_stmt (((struct clast_block *) stmt)->body, + used_basic_blocks) + && can_generate_code_stmt (stmt->next, used_basic_blocks); + + return false; +} + +/* Returns true when it is possible to generate code for this STMT. */ + +static bool +can_generate_code (struct clast_stmt *stmt) +{ + bool result; + struct pointer_set_t *used_basic_blocks = pointer_set_create (); + + result = can_generate_code_stmt (stmt, used_basic_blocks); + pointer_set_destroy (used_basic_blocks); + return result; +} + +/* Skip any definition that is a phi node with a single phi def. */ + +static tree +skip_phi_defs (tree ssa_name) +{ + tree result = ssa_name; + gimple def_stmt = SSA_NAME_DEF_STMT (ssa_name); + + if (gimple_code (def_stmt) == GIMPLE_PHI + && gimple_phi_num_args (def_stmt) == 1) + result = skip_phi_defs (gimple_phi_arg(def_stmt,0)->def); + + return result; +} + +/* Returns a VEC containing the phi-arg defs coming from SCOP_EXIT in + the destination block of SCOP_EXIT. */ + +static VEC (tree, heap) * +collect_scop_exit_phi_args (edge scop_exit) +{ + VEC (tree, heap) *phi_args = VEC_alloc (tree, heap, 1); + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_phis (scop_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple phi = gsi_stmt (gsi); + tree phi_arg = skip_phi_defs(PHI_ARG_DEF_FROM_EDGE (phi, scop_exit)); + + VEC_safe_push (tree, heap, phi_args, phi_arg); + } + + return phi_args; +} + +/* Patches (adds) PHI_ARGS to the phi nodes in SCOP_EXIT destination. */ + +static void +patch_scop_exit_phi_args (edge scop_exit, + VEC (tree, heap) *phi_args) +{ + int i = 0; + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_phis (scop_exit->dest); !gsi_end_p (gsi); + gsi_next (&gsi), i++) + { + tree def = VEC_index (tree, phi_args, i); + gimple phi = gsi_stmt (gsi); + + gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi, scop_exit) == NULL); + + add_phi_arg (phi, def, scop_exit); + } +} + +/* GIMPLE Loop Generator: generates loops from STMT in GIMPLE form for + the given SCOP. */ + +static void +gloog (scop_p scop, struct clast_stmt *stmt) +{ + edge new_scop_exit_edge = NULL; + basic_block scop_exit = SCOP_EXIT (scop); + VEC (tree, heap)* phi_args = + collect_scop_exit_phi_args (SESE_EXIT (SCOP_REGION (scop))); + VEC (name_tree, heap) *ivstack = VEC_alloc (name_tree, heap, 10); + edge construction_edge = SESE_ENTRY (SCOP_REGION (scop)); + basic_block old_scop_exit_idom = get_immediate_dominator (CDI_DOMINATORS, + scop_exit); + + if (!can_generate_code (stmt)) + { + cloog_clast_free (stmt); + return; + } + + new_scop_exit_edge = translate_clast (scop, + construction_edge->src->loop_father, + stmt, construction_edge, &ivstack); + redirect_edge_succ (new_scop_exit_edge, scop_exit); + if (!old_scop_exit_idom + || !dominated_by_p (CDI_DOMINATORS, SCOP_ENTRY (scop), + old_scop_exit_idom) + || SCOP_ENTRY (scop) == old_scop_exit_idom) + set_immediate_dominator (CDI_DOMINATORS, + new_scop_exit_edge->dest, + new_scop_exit_edge->src); + + cloog_clast_free (stmt); + + if (new_scop_exit_edge->dest == EXIT_BLOCK_PTR) + new_scop_exit_edge->flags = 0; + + find_unreachable_blocks (); + delete_unreachable_blocks (); + patch_phis_for_virtual_defs (); + patch_scop_exit_phi_args (new_scop_exit_edge, phi_args); + mark_old_loops (scop); + remove_dead_loops (); + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); + +#ifdef ENABLE_CHECKING + verify_loop_structure (); + verify_dominators (CDI_DOMINATORS); + verify_ssa (false); +#endif +} + +/* Returns the number of data references in SCOP. */ + +static int +nb_data_refs_in_scop (scop_p scop) +{ + int i; + graphite_bb_p gbb; + int res = 0; + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++) + res += VEC_length (data_reference_p, GBB_DATA_REFS (gbb)); + + return res; +} + +/* Check if a graphite bb can be ignored in graphite. We ignore all + bbs, that only contain code, that will be eliminated later. + + TODO: - Move PHI nodes and scalar variables out of these bbs, that only + remain conditions and induction variables. */ + +static bool +gbb_can_be_ignored (graphite_bb_p gb) +{ + gimple_stmt_iterator gsi; + scop_p scop = GBB_SCOP (gb); + loop_p loop = GBB_BB (gb)->loop_father; + + if (VEC_length (data_reference_p, GBB_DATA_REFS(gb))) + return false; + + /* Check statements. */ + for (gsi = gsi_start_bb (GBB_BB (gb)); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + switch (gimple_code (stmt)) + { + /* Control flow expressions can be ignored, as they are + represented in the iteration domains and will be + regenerated by graphite. */ + case GIMPLE_COND: + case GIMPLE_GOTO: + case GIMPLE_SWITCH: + break; + + /* Scalar variables can be ignored, if we can regenerate + them later using their scalar evolution function. + XXX: Just a heuristic, that needs further investigation. */ + case GIMPLE_ASSIGN: + { + tree var = gimple_assign_lhs (stmt); + var = analyze_scalar_evolution (loop, var); + var = instantiate_scev (outermost_loop_in_scop (scop, + GBB_BB (gb)), + loop, var); + if (TREE_CODE (var) == SCEV_NOT_KNOWN) + return false; + break; + } + /* Otherwise not ignoreable. */ + default: + return false; + } + } + + return true; +} + +/* Remove all ignoreable gbbs from SCOP. */ + +static void +scop_remove_ignoreable_gbbs (scop_p scop) +{ + graphite_bb_p gb; + int i; + + int max_schedule = scop_max_loop_depth (scop) + 1; + lambda_vector last_schedule = lambda_vector_new (max_schedule); + lambda_vector_clear (last_schedule, max_schedule); + + /* Update schedules. */ + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + { + int nb_loops = gbb_nb_loops (gb); + + if (GBB_STATIC_SCHEDULE (gb) [nb_loops] == 0) + last_schedule [nb_loops] = 0; + + if (gbb_can_be_ignored (gb)) + { + /* Mark gbb for remove. */ + bitmap_clear_bit (SCOP_BBS_B (scop), gb->bb->index); + GBB_SCOP (gb) = NULL; + last_schedule [nb_loops]--; + } + else + lambda_vector_add (GBB_STATIC_SCHEDULE (gb), last_schedule, + GBB_STATIC_SCHEDULE (gb), nb_loops + 1); + } + + /* Remove gbbs. */ + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + if (GBB_SCOP (gb) == NULL) + { + VEC_unordered_remove (graphite_bb_p, SCOP_BBS (scop), i); + free_graphite_bb (gb); + /* XXX: Hackish? But working. */ + i--; + } + + graphite_sort_gbbs (scop); +} + +/* Move the loop at index LOOP and insert it before index NEW_LOOP_POS. + This transformartion is only valid, if the loop nest between i and k is + perfectly nested. Therefore we do not need to change the static schedule. + + Example: + + for (i = 0; i < 50; i++) + for (j ...) + for (k = 5; k < 100; k++) + A + + To move k before i use: + + graphite_trans_bb_move_loop (A, 2, 0) + + for (k = 5; k < 100; k++) + for (i = 0; i < 50; i++) + for (j ...) + A + + And to move k back: + + graphite_trans_bb_move_loop (A, 0, 2) + + This function does not check the validity of interchanging loops. + This should be checked before calling this function. */ + +static void +graphite_trans_bb_move_loop (graphite_bb_p gb, int loop, + int new_loop_pos) +{ + CloogMatrix *domain = GBB_DOMAIN (gb); + int row, j; + loop_p tmp_loop_p; + + gcc_assert (loop < gbb_nb_loops (gb) + && new_loop_pos < gbb_nb_loops (gb)); + + /* Update LOOPS vector. */ + tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop); + VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop); + VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p); + + /* Move the domain columns. */ + if (loop < new_loop_pos) + for (row = 0; row < domain->NbRows; row++) + { + Value tmp; + value_init (tmp); + value_assign (tmp, domain->p[row][loop + 1]); + + for (j = loop ; j < new_loop_pos - 1; j++) + value_assign (domain->p[row][j + 1], domain->p[row][j + 2]); + + value_assign (domain->p[row][new_loop_pos], tmp); + value_clear (tmp); + } + else + for (row = 0; row < domain->NbRows; row++) + { + Value tmp; + value_init (tmp); + value_assign (tmp, domain->p[row][loop + 1]); + + for (j = loop ; j > new_loop_pos; j--) + value_assign (domain->p[row][j + 1], domain->p[row][j]); + + value_assign (domain->p[row][new_loop_pos + 1], tmp); + value_clear (tmp); + } +} + +/* Get the index of the column representing constants in the DOMAIN + matrix. */ + +static int +const_column_index (CloogMatrix *domain) +{ + return domain->NbColumns - 1; +} + + +/* Get the first index that is positive or negative, determined + following the value of POSITIVE, in matrix DOMAIN in COLUMN. */ + +static int +get_first_matching_sign_row_index (CloogMatrix *domain, int column, + bool positive) +{ + int row; + + for (row = 0; row < domain->NbRows; row++) + { + int val = value_get_si (domain->p[row][column]); + + if (val > 0 && positive) + return row; + + else if (val < 0 && !positive) + return row; + } + + gcc_unreachable (); +} + +/* Get the lower bound of COLUMN in matrix DOMAIN. */ + +static int +get_lower_bound_row (CloogMatrix *domain, int column) +{ + return get_first_matching_sign_row_index (domain, column, true); +} + +/* Get the upper bound of COLUMN in matrix DOMAIN. */ + +static int +get_upper_bound_row (CloogMatrix *domain, int column) +{ + return get_first_matching_sign_row_index (domain, column, false); +} + +/* Get the lower bound of LOOP. */ + +static void +get_lower_bound (CloogMatrix *domain, int loop, Value lower_bound_result) +{ + int lower_bound_row = get_lower_bound_row (domain, loop); + value_init (lower_bound_result); + value_assign (lower_bound_result, + domain->p[lower_bound_row][const_column_index(domain)]); +} + +/* Get the upper bound of LOOP. */ + +static void +get_upper_bound (CloogMatrix *domain, int loop, Value upper_bound_result) +{ + int upper_bound_row = get_upper_bound_row (domain, loop); + value_init (upper_bound_result); + value_assign (upper_bound_result, + domain->p[upper_bound_row][const_column_index(domain)]); +} + +/* Strip mines the loop of BB at the position LOOP_DEPTH with STRIDE. + Always valid, but not always a performance improvement. */ + +static void +graphite_trans_bb_strip_mine (graphite_bb_p gb, int loop_depth, int stride) +{ + int row, col; + + CloogMatrix *domain = GBB_DOMAIN (gb); + CloogMatrix *new_domain = cloog_matrix_alloc (domain->NbRows + 3, + domain->NbColumns + 1); + + int col_loop_old = loop_depth + 2; + int col_loop_strip = col_loop_old - 1; + + Value old_lower_bound; + Value old_upper_bound; + + + gcc_assert (loop_depth <= gbb_nb_loops (gb) - 1); + + VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), loop_depth, NULL); + + GBB_DOMAIN (gb) = new_domain; + + /* + nrows = 4, ncols = 4 + eq i j c + 1 1 0 0 + 1 -1 0 99 + 1 0 1 0 + 1 0 -1 99 + */ + + /* Move domain. */ + for (row = 0; row < domain->NbRows; row++) + for (col = 0; col < domain->NbColumns; col++) + if (col <= loop_depth) + { + value_assign (new_domain->p[row][col], domain->p[row][col]); + } + else + { + value_assign (new_domain->p[row][col + 1], domain->p[row][col]); + } + + + /* + nrows = 6, ncols = 5 + outer inner + eq i jj j c + 1 1 0 0 0 + 1 -1 0 0 99 + 1 0 0 1 0 + 1 0 0 -1 99 + 0 0 0 0 0 + 0 0 0 0 0 + 0 0 0 0 0 + */ + + row = domain->NbRows; + + /* Add outer loop. */ + + get_lower_bound (new_domain, col_loop_old, old_lower_bound); + get_upper_bound (new_domain, col_loop_old, old_upper_bound); + + /* Set Lower Bound */ + value_set_si (new_domain->p[row][0], 1); + value_set_si (new_domain->p[row][col_loop_strip], 1); + value_assign (new_domain->p[row][const_column_index (new_domain)], + old_lower_bound); + row++; + + + /* + 6 5 + eq i jj j c + 1 1 0 0 0 + 1 -1 0 0 99 + 1 0 0 1 0 - + 1 0 0 -1 99 | copy old lower bound + 1 0 1 0 0 <- + 0 0 0 0 0 + 0 0 0 0 0 + */ + + { + Value new_upper_bound; + Value strip_size_value; + + value_init (new_upper_bound); + + value_init (strip_size_value); + value_set_si (strip_size_value, (int) stride); + + + value_pdivision(new_upper_bound,old_upper_bound,strip_size_value); + value_add_int (new_upper_bound, new_upper_bound, 1); + + /* Set Upper Bound */ + value_set_si (new_domain->p[row][0], 1); + value_set_si (new_domain->p[row][col_loop_strip], -1); + value_assign (new_domain->p[row][const_column_index (new_domain)], + new_upper_bound); + row++; + } + /* + 6 5 + eq i jj j c + 1 1 0 0 0 + 1 -1 0 0 99 + 1 0 0 1 0 + 1 0 0 -1 99 + 1 0 1 0 0 + 1 0 -1 0 25 (divide old upper bound with stride) + 0 0 0 0 0 + */ + + { + row = get_lower_bound_row (new_domain, col_loop_old); + /* Add local variable to keep linear representation. */ + value_set_si (new_domain->p[row][0], 1); + value_set_si (new_domain->p[row][const_column_index (new_domain)],0); + value_set_si (new_domain->p[row][col_loop_old], 1); + value_set_si (new_domain->p[row][col_loop_strip], -1*((int)stride)); + } + + /* + 6 5 + eq i jj j c + 1 1 0 0 0 + 1 -1 0 0 99 + 1 0 -1 1 0 + 1 0 0 -1 99 + 1 0 1 0 0 + 1 0 -1 0 25 (divide old upper bound with stride) + 0 0 0 0 0 + */ + + { + row = new_domain->NbRows-1; + + value_set_si (new_domain->p[row][0], 1); + value_set_si (new_domain->p[row][col_loop_old], -1); + value_set_si (new_domain->p[row][col_loop_strip], stride); + value_set_si (new_domain->p[row][const_column_index (new_domain)], + stride-1); + } + + /* + 6 5 + eq i jj j c + 1 1 0 0 0 i >= 0 + 1 -1 0 0 99 99 >= i + 1 0 -4 1 0 j >= 4*jj + 1 0 0 -1 99 99 >= j + 1 0 1 0 0 jj >= 0 + 1 0 -1 0 25 25 >= jj + 0 0 4 -1 3 jj+3 >= j + */ + + cloog_matrix_free (domain); + + /* Update static schedule. */ + { + int i; + int nb_loops = gbb_nb_loops (gb); + lambda_vector new_schedule = lambda_vector_new (nb_loops + 1); + + for (i = 0; i <= loop_depth; i++) + new_schedule[i] = GBB_STATIC_SCHEDULE (gb)[i]; + + for (i = loop_depth + 1; i <= nb_loops - 2; i++) + new_schedule[i + 2] = GBB_STATIC_SCHEDULE (gb)[i]; + + GBB_STATIC_SCHEDULE (gb) = new_schedule; + } +} + +/* Returns true when the strip mining of LOOP_INDEX by STRIDE is + profitable or undecidable. GB is the statement around which the + loops will be strip mined. */ + +static bool +strip_mine_profitable_p (graphite_bb_p gb, int stride, + int loop_index) +{ + bool res = true; + edge exit = NULL; + tree niter; + loop_p loop; + long niter_val; + + loop = VEC_index (loop_p, GBB_LOOPS (gb), loop_index); + exit = single_exit (loop); + + niter = find_loop_niter (loop, &exit); + if (niter == chrec_dont_know + || TREE_CODE (niter) != INTEGER_CST) + return true; + + niter_val = int_cst_value (niter); + + if (niter_val < stride) + { + res = false; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nStrip Mining is not profitable for loop %d:", + loop_index); + fprintf (dump_file, "number of iterations is too low.\n"); + } + } + + return res; +} + +/* Determines when the interchange of LOOP_A and LOOP_B belonging to + SCOP is legal. */ + +static bool +is_interchange_valid (scop_p scop, int loop_a, int loop_b) +{ + bool res; + VEC (ddr_p, heap) *dependence_relations; + VEC (data_reference_p, heap) *datarefs; + + struct loop *nest = VEC_index (loop_p, SCOP_LOOP_NEST (scop), loop_a); + int depth = perfect_loop_nest_depth (nest); + lambda_trans_matrix trans; + + gcc_assert (loop_a < loop_b); + + dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); + datarefs = VEC_alloc (data_reference_p, heap, 10); + + if (!compute_data_dependences_for_loop (nest, true, &datarefs, + &dependence_relations)) + return false; + + if (dump_file && (dump_flags & TDF_DETAILS)) + dump_ddrs (dump_file, dependence_relations); + + trans = lambda_trans_matrix_new (depth, depth); + lambda_matrix_id (LTM_MATRIX (trans), depth); + + lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a); + + if (!lambda_transform_legal_p (trans, depth, dependence_relations)) + { + lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a); + res = false; + } + else + res = true; + + free_dependence_relations (dependence_relations); + free_data_refs (datarefs); + return res; +} + +/* Loop block the LOOPS innermost loops of GB with stride size STRIDE. + + Example + + for (i = 0; i <= 50; i++=4) + for (k = 0; k <= 100; k++=4) + for (l = 0; l <= 200; l++=4) + A + + To strip mine the two inner most loops with stride = 4 call: + + graphite_trans_bb_block (A, 4, 2) + + for (i = 0; i <= 50; i++) + for (kk = 0; kk <= 100; kk+=4) + for (ll = 0; ll <= 200; ll+=4) + for (k = kk; k <= min (100, kk + 3); k++) + for (l = ll; l <= min (200, ll + 3); l++) + A +*/ + +static bool +graphite_trans_bb_block (graphite_bb_p gb, int stride, int loops) +{ + int i, j; + int nb_loops = gbb_nb_loops (gb); + int start = nb_loops - loops; + scop_p scop = GBB_SCOP (gb); + + gcc_assert (scop_contains_loop (scop, gbb_loop (gb))); + + for (i = start ; i < nb_loops; i++) + for (j = i + 1; j < nb_loops; j++) + if (!is_interchange_valid (scop, i, j)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "\nInterchange not valid for loops %d and %d:\n", i, j); + return false; + } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "\nInterchange valid for loops %d and %d:\n", i, j); + + /* Check if strip mining is profitable for every loop. */ + for (i = 0; i < nb_loops - start; i++) + if (!strip_mine_profitable_p (gb, stride, start + i)) + return false; + + /* Strip mine loops. */ + for (i = 0; i < nb_loops - start; i++) + graphite_trans_bb_strip_mine (gb, start + 2 * i, stride); + + /* Interchange loops. */ + for (i = 1; i < nb_loops - start; i++) + graphite_trans_bb_move_loop (gb, start + 2 * i, start + i); + + return true; +} + +/* Loop block LOOPS innermost loops of a loop nest. BBS represent the + basic blocks that belong to the loop nest to be blocked. */ + +static bool +graphite_trans_loop_block (VEC (graphite_bb_p, heap) *bbs, int loops) +{ + graphite_bb_p gb; + int i; + bool transform_done = false; + + /* TODO: - Calculate the stride size automatically. */ + int stride_size = 64; + + for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++) + transform_done |= graphite_trans_bb_block (gb, stride_size, loops); + + return transform_done; +} + +/* Loop block all basic blocks of SCOP. */ + +static bool +graphite_trans_scop_block (scop_p scop) +{ + graphite_bb_p gb; + int i, j; + int last_nb_loops; + int nb_loops; + bool perfect = true; + bool transform_done = false; + + VEC (graphite_bb_p, heap) *bbs = VEC_alloc (graphite_bb_p, heap, 3); + int max_schedule = scop_max_loop_depth (scop) + 1; + lambda_vector last_schedule = lambda_vector_new (max_schedule); + + if (VEC_length (graphite_bb_p, SCOP_BBS (scop)) == 0) + return transform_done; + + /* Get the data of the first bb. */ + gb = VEC_index (graphite_bb_p, SCOP_BBS (scop), 0); + last_nb_loops = gbb_nb_loops (gb); + lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule, + last_nb_loops + 1); + VEC_safe_push (graphite_bb_p, heap, bbs, gb); + + for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) + { + /* We did the first bb before. */ + if (i == 0) + continue; + + nb_loops = gbb_nb_loops (gb); + + /* If the number of loops is unchanged and only the last element of the + schedule changes, we stay in the loop nest. */ + if (nb_loops == last_nb_loops + && (last_schedule [nb_loops + 1] + != GBB_STATIC_SCHEDULE (gb)[nb_loops + 1])) + { + VEC_safe_push (graphite_bb_p, heap, bbs, gb); + continue; + } + + /* Otherwise, we left the innermost loop. So check, if the last bb was in + a perfect loop nest and how many loops are contained in this perfect + loop nest. + + Count the number of zeros from the end of the schedule. They are the + number of surrounding loops. + + Example: + last_bb 2 3 2 0 0 0 0 3 + bb 2 4 0 + <------ j = 4 + + last_bb 2 3 2 0 0 0 0 3 + bb 2 3 2 0 1 + <-- j = 2 + + If there is no zero, there were other bbs in outer loops and the loop + nest is not perfect. */ + for (j = last_nb_loops - 1; j >= 0; j--) + { + if (last_schedule [j] != 0 + || (j <= nb_loops && GBB_STATIC_SCHEDULE (gb)[j] == 1)) + { + j--; + break; + } + } + + j++; + + /* Found perfect loop nest. */ + if (perfect && last_nb_loops - j > 0) + transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j); + + /* Check if we start with a new loop. + + Example: + + last_bb 2 3 2 0 0 0 0 3 + bb 2 3 2 0 0 1 0 + + Here we start with the loop "2 3 2 0 0 1" + + last_bb 2 3 2 0 0 0 0 3 + bb 2 3 2 0 0 1 + + But here not, so the loop nest can never be perfect. */ + + perfect = (GBB_STATIC_SCHEDULE (gb)[nb_loops] == 0); + + /* Update the last_bb infos. We do not do that for the bbs in the same + loop, as the data we use is not changed. */ + last_nb_loops = nb_loops; + lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule, + nb_loops + 1); + VEC_truncate (graphite_bb_p, bbs, 0); + VEC_safe_push (graphite_bb_p, heap, bbs, gb); + } + + /* Check if the last loop nest was perfect. It is the same check as above, + but the comparison with the next bb is missing. */ + for (j = last_nb_loops - 1; j >= 0; j--) + if (last_schedule [j] != 0) + { + j--; + break; + } + + j++; + + /* Found perfect loop nest. */ + if (last_nb_loops - j > 0) + transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j); + VEC_free (graphite_bb_p, heap, bbs); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nLoop blocked.\n"); + + return transform_done; +} + +/* Apply graphite transformations to all the basic blocks of SCOP. */ + +static bool +graphite_apply_transformations (scop_p scop) +{ + bool transform_done = false; + + /* Sort the list of bbs. Keep them always sorted. */ + graphite_sort_gbbs (scop); + scop_remove_ignoreable_gbbs (scop); + + if (flag_loop_block) + transform_done = graphite_trans_scop_block (scop); + +#if 0 && ENABLE_CHECKING + /* When the compiler is configured with ENABLE_CHECKING, always + generate code, even if we did not apply any transformation. This + provides better code coverage of the backend code generator. + + This also allows to check the performance for an identity + transform: GIMPLE -> GRAPHITE -> GIMPLE; and the output of CLooG + is never an identity: if CLooG optimizations are not disabled, + the CLooG output is always optimized in control flow. */ + transform_done = true; +#endif + + return transform_done; +} + +/* We limit all SCoPs to SCoPs, that are completely surrounded by a loop. + + Example: + + for (i | + { | + for (j | SCoP 1 + for (k | + } | + + * SCoP frontier, as this line is not surrounded by any loop. * + + for (l | SCoP 2 + + This is necessary as scalar evolution and parameter detection need a + outermost loop to initialize parameters correctly. + + TODO: FIX scalar evolution and parameter detection to allow mor flexible + SCoP frontiers. */ + +static void +limit_scops (void) +{ + VEC (scop_p, heap) *new_scops = VEC_alloc (scop_p, heap, 3); + int i; + scop_p scop; + + for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++) + { + int j; + loop_p loop; + build_scop_bbs (scop); + build_scop_loop_nests (scop); + + for (j = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), j, loop); j++) + if (!loop_in_scop_p (loop_outer (loop), scop)) + { + scop_p n_scop = new_scop (loop_preheader_edge (loop)); + end_scop (n_scop, single_exit (loop), false); + VEC_safe_push (scop_p, heap, new_scops, n_scop); + } + } + + free_scops (current_scops); + current_scops = new_scops; +} + +/* Perform a set of linear transforms on the loops of the current + function. */ + +void +graphite_transform_loops (void) +{ + int i; + scop_p scop; + + if (number_of_loops () <= 1) + return; + + current_scops = VEC_alloc (scop_p, heap, 3); + + calculate_dominance_info (CDI_DOMINATORS); + calculate_dominance_info (CDI_POST_DOMINATORS); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Graphite loop transformations \n"); + + cloog_initialize (); + build_scops (); + limit_scops (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nnumber of SCoPs: %d\n", + VEC_length (scop_p, current_scops)); + + for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++) + { + build_scop_bbs (scop); + build_scop_loop_nests (scop); + build_scop_canonical_schedules (scop); + build_bb_loops (scop); + find_scop_parameters (scop); + build_scop_context (scop); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\n(In SCoP %d:\n", i); + fprintf (dump_file, "\nnumber of bbs: %d\n", + VEC_length (graphite_bb_p, SCOP_BBS (scop))); + fprintf (dump_file, "\nnumber of loops: %d)\n", + VEC_length (loop_p, SCOP_LOOP_NEST (scop))); + } + + if (!build_scop_iteration_domain (scop)) + continue; + + build_scop_conditions (scop); + build_scop_data_accesses (scop); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + int nbrefs = nb_data_refs_in_scop (scop); + fprintf (dump_file, "\nnumber of data refs: %d\n", nbrefs); + } + + if (graphite_apply_transformations (scop)) + gloog (scop, find_transform (scop)); + } + + /* Cleanup. */ + free_scops (current_scops); + cloog_finalize (); +} + +#else /* If Cloog is not available: #ifndef HAVE_cloog. */ + +void +graphite_transform_loops (void) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Graphite loop optimizations cannot be used.\n"); + fprintf (dump_file, "GCC has not been configured with the required " + "libraries for Graphite loop optimizations."); + } + sorry ("Graphite loop optimizations cannot be used"); +} + +#endif diff --git a/gcc/graphite.h b/gcc/graphite.h new file mode 100644 index 0000000..1a3cf6c --- /dev/null +++ b/gcc/graphite.h @@ -0,0 +1,516 @@ +/* Gimple Represented as Polyhedra. + Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. + Contributed by Sebastian Pop <sebastian.pop@inria.fr>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "tree-data-ref.h" + +typedef struct graphite_bb *graphite_bb_p; +DEF_VEC_P(graphite_bb_p); +DEF_VEC_ALLOC_P (graphite_bb_p, heap); + +DEF_VEC_P(scop_p); +DEF_VEC_ALLOC_P (scop_p, heap); + +static inline int scop_nb_loops (scop_p scop); +static inline unsigned scop_nb_params (scop_p scop); +static inline bool scop_contains_loop (scop_p scop, struct loop *loop); + +struct graphite_bb +{ + basic_block bb; + scop_p scop; + + /* The static schedule contains the textual order for every loop layer. + + Example: + + S0 + for (i ...) + { + S1 + for (j ...) + { + S2 + S3 + } + S4 + } + S5 + for (k ...) + { + S6 + S7 + for (l ...) + { + S8 + } + S9 + } + S10 + + Schedules: + + | Depth + BB | 0 1 2 + ------------ + S0 | 0 + S1 | 1, 0 + S2 | 1, 1, 0 + S3 | 1, 1, 1 + S4 | 1, 2 + S5 | 2 + S6 | 3, 0 + S7 | 3, 1 + S8 | 3, 2, 0 + S9 | 3, 3 + S10| 4 + + Normalization rules: + - One SCoP can never contain two bbs with the same schedule timestamp. + - All bbs at the same loop depth have a consecutive ordering (no gaps). */ + lambda_vector static_schedule; + + /* The iteration domain of this bb. It contains this columns: + - In/Eq: If this line is a equation or inequation. + - For every loop iterator one column. + - One column for every parameter in this SCoP. + - The constant column to add integers to the (in)equations. + + Example: + + for (i = a - 7*b + 8; i <= 3*a + 13*b + 20; i++) + for (j = 2; j <= 2*i + 5; j++) + for (k = 0; k <= 5; k++) + S (i,j,k) + + Loop iterators: i, j, k + Parameters: a, b + + (I)eq i j k a b 1 + + 1 1 0 0 -1 7 -8 # i >= a - 7b + 8 + 1 -1 0 0 3 13 20 # i <= 3a + 13b + 20 + 1 0 1 0 0 0 -2 # j >= 2 + 1 2 -1 0 0 0 5 # j <= 2i + 5 + 1 0 0 1 0 0 0 # k >= 0 + 1 0 0 -1 0 0 5 # k <= 5 + + The number of loop iterators may change and is not connected to the + number of loops, that surrounded this bb in the gimple code. */ + CloogMatrix *domain; + + /* Lists containing the restrictions of the conditional statements + dominating this bb. This bb can only be executed, if all conditions + are true. + + Example: + + for (i = 0; i <= 20; i++) + { + A + + if (2i <= 8) + B + } + + So for B there is a additional condition (2i <= 8). + + TODO: Add this restrictions to the domain matrix. + + List of COND_EXPR and SWITCH_EXPR. A COND_EXPR is true only if the + corresponding element in CONDITION_CASES is not NULL_TREE. For a + SWITCH_EXPR the corresponding element in CONDITION_CASES is a + CASE_LABEL_EXPR. */ + VEC (gimple, heap) *conditions; + VEC (gimple, heap) *condition_cases; + + /* LOOPS contains for every column in the graphite domain the corresponding + gimple loop. If there exists no corresponding gimple loop LOOPS contains + NULL. + + Example: + + Original code: + + for (i = 0; i <= 20; i++) + for (j = 5; j <= 10; j++) + A + + Original domain: + + (I)eq i j 1 + 1 1 0 0 # i >= 0 + 1 -1 0 20 # i <= 20 + 1 0 1 0 # j >= 0 + 1 0 -1 10 # j <= 10 + + Original loops vector: + 0 1 + Loop i Loop j + + After some changes (Exchange i and j, strip-mine i): + + Domain: + + (I)eq j ii i k 1 + 1 0 0 1 0 0 # i >= 0 + 1 0 0 -1 0 20 # i <= 20 + 1 1 0 0 0 0 # j >= 0 + 1 -1 0 0 0 10 # j <= 10 + 1 0 -1 1 0 0 # ii <= i + 1 0 1 -1 0 1 # ii + 1 >= i + 1 0 -1 0 2 0 # ii <= 2k + 1 0 1 0 -2 0 # ii >= 2k + + Iterator vector: + 0 1 2 3 + Loop j NULL Loop i NULL + + Means the original loop i is now at column two of the domain and + loop j in the original loop nest is now at column 0. Column 1 and + 3 are emtpy. */ + VEC (loop_p, heap) *loops; + + lambda_vector compressed_alpha_matrix; + CloogMatrix *dynamic_schedule; + VEC (data_reference_p, heap) *data_refs; +}; + +#define GBB_BB(GBB) GBB->bb +#define GBB_SCOP(GBB) GBB->scop +#define GBB_STATIC_SCHEDULE(GBB) GBB->static_schedule +#define GBB_DATA_REFS(GBB) GBB->data_refs +#define GBB_ALPHA(GBB) GBB->compressed_alpha_matrix +#define GBB_DYNAMIC_SCHEDULE(GBB) GBB->dynamic_schedule +#define GBB_DOMAIN(GBB) GBB->domain +#define GBB_CONDITIONS(GBB) GBB->conditions +#define GBB_CONDITION_CASES(GBB) GBB->condition_cases +#define GBB_LOOPS(GBB) GBB->loops + +/* Return the loop that contains the basic block GBB. */ + +static inline struct loop * +gbb_loop (struct graphite_bb *gbb) +{ + return GBB_BB (gbb)->loop_father; +} + +/* Calculate the number of loops around GB in the current SCOP. Only + works if GBB_DOMAIN is built. */ + +static inline int +gbb_nb_loops (const struct graphite_bb *gb) +{ + scop_p scop = GBB_SCOP (gb); + + if (GBB_DOMAIN (gb) == NULL) + return 0; + + return GBB_DOMAIN (gb)->NbColumns - scop_nb_params (scop) - 2; +} + +/* Returns the gimple loop, that corresponds to the loop_iterator_INDEX. + If there is no corresponding gimple loop, we return NULL. */ + +static inline loop_p +gbb_loop_at_index (graphite_bb_p gb, int index) +{ + return VEC_index (loop_p, GBB_LOOPS (gb), index); +} + +/* Returns the corresponding loop iterator index for a gimple loop. */ + +static inline int +gbb_loop_index (graphite_bb_p gb, loop_p loop) +{ + int i; + loop_p l; + + for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, l); i++) + if (loop == l) + return i; + + gcc_unreachable(); +} + +struct loop_to_cloog_loop_str +{ + unsigned int loop_num; + unsigned int loop_position; /* The column that represents this loop. */ + CloogLoop *cloog_loop; +}; + +typedef struct name_tree +{ + tree t; + const char *name; + struct loop* loop; +} *name_tree; + +DEF_VEC_P(name_tree); +DEF_VEC_ALLOC_P (name_tree, heap); + +/* A Single Entry, Single Exit region is a part of the CFG delimited + by two edges. */ +typedef struct sese +{ + edge entry, exit; +} *sese; + +#define SESE_ENTRY(S) (S->entry) +#define SESE_EXIT(S) (S->exit) + +/* A SCOP is a Static Control Part of the program, simple enough to be + represented in polyhedral form. */ +struct scop +{ + /* A SCOP is defined as a SESE region. */ + sese region; + + /* All the basic blocks in this scop. They have extra information + attached to them, in the graphite_bb structure. */ + VEC (graphite_bb_p, heap) *bbs; + + /* Set for a basic block index when it belongs to this SCOP. */ + bitmap bbs_b; + + lambda_vector static_schedule; + + /* Parameters used within the SCOP. */ + VEC (name_tree, heap) *params; + + /* A collection of old induction variables*/ + VEC (name_tree, heap) *old_ivs; + + /* Loops completely contained in the SCOP. */ + bitmap loops; + VEC (loop_p, heap) *loop_nest; + + /* ??? It looks like a global mapping loop_id -> cloog_loop would work. */ + htab_t loop2cloog_loop; + + /* CLooG representation of this SCOP. */ + CloogProgram *program; +}; + +#define SCOP_BBS(S) S->bbs +#define SCOP_BBS_B(S) S->bbs_b +#define SCOP_REGION(S) S->region +/* SCOP_ENTRY bb dominates all the bbs of the scop. SCOP_EXIT bb + post-dominates all the bbs of the scop. SCOP_EXIT potentially + contains non affine data accesses, side effect statements or + difficult constructs, and thus is not considered part of the scop, + but just a boundary. SCOP_ENTRY is considered part of the scop. */ +#define SCOP_ENTRY(S) (SESE_ENTRY (SCOP_REGION (S))->dest) +#define SCOP_EXIT(S) (SESE_EXIT (SCOP_REGION (S))->dest) +#define SCOP_STATIC_SCHEDULE(S) S->static_schedule +#define SCOP_LOOPS(S) S->loops +#define SCOP_LOOP_NEST(S) S->loop_nest +#define SCOP_PARAMS(S) S->params +#define SCOP_OLDIVS(S) S->old_ivs +#define SCOP_PROG(S) S->program +#define SCOP_LOOP2CLOOG_LOOP(S) S->loop2cloog_loop +#define SCOP_LOOPS_MAPPING(S) S->loops_mapping + +extern void debug_scop (scop_p, int); +extern void debug_scops (int); +extern void print_graphite_bb (FILE *, graphite_bb_p, int, int); +extern void debug_gbb (graphite_bb_p, int); +extern void dot_scop (scop_p); +extern void dot_all_scops (void); +extern void debug_clast_stmt (struct clast_stmt *); + + +extern void debug_loop_vec (graphite_bb_p gb); +extern void debug_oldivs (scop_p); + +typedef VEC(name_tree, heap) **loop_iv_stack; +extern void loop_iv_stack_debug (loop_iv_stack); + + +/* Return the number of gimple loops contained in SCOP. */ + +static inline int +scop_nb_loops (scop_p scop) +{ + return VEC_length (loop_p, SCOP_LOOP_NEST (scop)); +} + +/* Returns the number of parameters for SCOP. */ + +static inline unsigned +scop_nb_params (scop_p scop) +{ + return VEC_length (name_tree, SCOP_PARAMS (scop)); +} + +/* Return the dimension of the domains for SCOP. */ + +static inline int +scop_dim_domain (scop_p scop) +{ + return scop_nb_loops (scop) + scop_nb_params (scop) + 1; +} + +/* Return the dimension of the domains for GB. */ + +static inline int +gbb_dim_domain (graphite_bb_p gb) +{ + return scop_dim_domain (GBB_SCOP (gb)); +} + +/* Returns the dimensionality of a loop iteration domain for a given + loop, identified by LOOP_NUM, with respect to SCOP. */ + +static inline int +loop_domain_dim (unsigned int loop_num, scop_p scop) +{ + struct loop_to_cloog_loop_str tmp, *slot; + htab_t tab = SCOP_LOOP2CLOOG_LOOP (scop); + + tmp.loop_num = loop_num; + slot = (struct loop_to_cloog_loop_str *) htab_find (tab, &tmp); + + /* The loop containing the entry of the scop is not always part of + the SCoP, and it is not registered in SCOP_LOOP2CLOOG_LOOP. */ + if (!slot) + return scop_nb_params (scop) + 2; + + return cloog_domain_dim (cloog_loop_domain (slot->cloog_loop)) + 2; +} + +/* Returns the dimensionality of an enclosing loop iteration domain + with respect to enclosing SCoP for a given data reference REF. */ + +static inline int +ref_nb_loops (data_reference_p ref) +{ + return loop_domain_dim (loop_containing_stmt (DR_STMT (ref))->num, DR_SCOP (ref)); +} + +/* Returns the dimensionality of a loop iteration vector in a loop + iteration domain for a given loop (identified by LOOP_NUM) with + respect to SCOP. */ + +static inline int +loop_iteration_vector_dim (unsigned int loop_num, scop_p scop) +{ + return loop_domain_dim (loop_num, scop) - 2 - scop_nb_params (scop); +} + +/* Checks, if SCOP contains LOOP. */ + +static inline bool +scop_contains_loop (scop_p scop, struct loop *loop) +{ + return bitmap_bit_p (SCOP_LOOPS (scop), loop->num); +} + +/* Returns the index of LOOP in the domain matrix for the SCOP. */ + +static inline int +scop_loop_index (scop_p scop, struct loop *loop) +{ + unsigned i; + struct loop *l; + + gcc_assert (scop_contains_loop (scop, loop)); + + for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, l); i++) + if (l == loop) + return i; + + gcc_unreachable(); +} + +/* Return the index of innermost loop that contains the basic block + GBB. */ + +static inline int +gbb_inner_most_loop_index (scop_p scop, graphite_bb_p gb) +{ + return scop_loop_index(scop, gbb_loop (gb)); +} + +/* Return the outermost loop that contains the loop LOOP. The outer + loops are searched until a sibling for the outer loop is found. */ + +static struct loop * +outer_most_loop_1 (scop_p scop, struct loop* loop, struct loop* current_outer) +{ + return (!scop_contains_loop (scop, loop)) ? current_outer : + (loop->next != NULL) ? loop : + outer_most_loop_1 (scop, loop_outer (loop), loop); +} + +/* Return the outermost loop that contains the loop LOOP. */ + +static struct loop * +outer_most_loop (scop_p scop, struct loop *loop) +{ + return outer_most_loop_1 (scop, loop, NULL); +} + +/* Return the index of the outermost loop that contains the basic + block BB. */ + +static inline int +gbb_outer_most_loop_index (scop_p scop, graphite_bb_p gb) +{ + return scop_loop_index (scop, outer_most_loop (scop, gbb_loop (gb))); +} + +/* Return the loop depth of LOOP in SCOP. */ + +static inline unsigned int +scop_gimple_loop_depth (scop_p scop, loop_p loop) +{ + unsigned int depth = 0; + + loop = loop_outer (loop); + + while (scop_contains_loop (scop, loop)) + { + depth++; + loop = loop_outer (loop); + } + + return depth; +} + +/* Associate a POLYHEDRON dependence description to two data + references A and B. */ +struct data_dependence_polyhedron +{ + struct data_reference *a; + struct data_reference *b; + bool reversed_p; + bool loop_carried; /*TODO:konrad get rid of this, make level signed */ + signed level; + CloogDomain *polyhedron; +}; + +#define RDGE_DDP(E) ((struct data_dependence_polyhedron*) ((E)->data)) + +typedef struct data_dependence_polyhedron *ddp_p; + +DEF_VEC_P(ddp_p); +DEF_VEC_ALLOC_P(ddp_p,heap); + diff --git a/gcc/lambda-code.c b/gcc/lambda-code.c index 21bc184..2fdd898 100644 --- a/gcc/lambda-code.c +++ b/gcc/lambda-code.c @@ -147,7 +147,6 @@ static lambda_lattice lambda_lattice_new (int, int, struct obstack *); static lambda_lattice lambda_lattice_compute_base (lambda_loopnest, struct obstack *); -static tree find_induction_var_from_exit_cond (struct loop *); static bool can_convert_to_perfect_nest (struct loop *); /* Create a new lambda body vector. */ @@ -1434,7 +1433,7 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, /* Given a LOOP, find the induction variable it is testing against in the exit condition. Return the induction variable if found, NULL otherwise. */ -static tree +tree find_induction_var_from_exit_cond (struct loop *loop) { gimple expr = get_loop_exit_condition (loop); diff --git a/gcc/lambda.h b/gcc/lambda.h index 66fbac7..2d321fb 100644 --- a/gcc/lambda.h +++ b/gcc/lambda.h @@ -39,6 +39,9 @@ DEF_VEC_ALLOC_P (lambda_vector_vec_p, heap); all vectors are the same length). */ typedef lambda_vector *lambda_matrix; +DEF_VEC_P (lambda_matrix); +DEF_VEC_ALLOC_P (lambda_matrix, heap); + /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE matrix. Rather than use floats, we simply keep a single DENOMINATOR that represents the denominator for every element in the matrix. */ @@ -213,6 +216,7 @@ void lambda_loopnest_to_gcc_loopnest (struct loop *, lambda_loopnest, lambda_trans_matrix, struct obstack *); void remove_iv (gimple); +tree find_induction_var_from_exit_cond (struct loop *); static inline void lambda_vector_negate (lambda_vector, lambda_vector, int); static inline void lambda_vector_mult_const (lambda_vector, lambda_vector, int, int); @@ -374,6 +378,33 @@ lambda_vector_matrix_mult (lambda_vector vect, int m, lambda_matrix mat, dest[i] += mat[j][i] * vect[j]; } +/* Compare two vectors returning an integer less than, equal to, or + greater than zero if the first argument is considered to be respectively + less than, equal to, or greater than the second. + We use the lexicographic order. */ + +static inline int +lambda_vector_compare (lambda_vector vec1, int length1, lambda_vector vec2, + int length2) +{ + int min_length; + int i; + + if (length1 < length2) + min_length = length1; + else + min_length = length2; + + for (i = 0; i < min_length; i++) + if (vec1[i] < vec2[i]) + return -1; + else if (vec1[i] > vec2[i]) + return 1; + else + continue; + + return length1 - length2; +} /* Print out a vector VEC of length N to OUTFILE. */ diff --git a/gcc/passes.c b/gcc/passes.c index 7c76747..36c107e 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -655,6 +655,7 @@ init_optimization_passes (void) NEXT_PASS (pass_check_data_deps); NEXT_PASS (pass_loop_distribution); NEXT_PASS (pass_linear_transform); + NEXT_PASS (pass_graphite_transforms); NEXT_PASS (pass_iv_canon); NEXT_PASS (pass_if_conversion); NEXT_PASS (pass_vectorize); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index fa07075..5c75e1f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,23 @@ +2008-09-02 Sebastian Pop <sebastian.pop@amd.com> + Tobias Grosser <grosser@fim.uni-passau.de> + Jan Sjodin <jan.sjodin@amd.com> + Harsha Jagasia <harsha.jagasia@amd.com> + Dwarakanath Rajagopal <dwarak.rajagopal@amd.com> + Konrad Trifunovic <konrad.trifunovic@inria.fr> + Adrien Eliche <aeliche@isty.uvsq.fr> + + Merge from graphite branch. + * gcc.dg/graphite/scop-{0,1,2,3,4,5,6,7,8,9, + 10,11,12,13,14,15,16,17,18}.c: New. + * gcc.dg/graphite/graphite.exp: New. + * gcc.dg/graphite/scop-matmult.c: New. + * gcc.dg/graphite/block-0.c: New. + * lib/target-supports.exp (check_effective_target_fgraphite): New. + * gfortran.dg/graphite/block-1.f90: New. + * gfortran.dg/graphite/scop-{1,2}.f: New. + * gfortran.dg/graphite/block-{1,3,4}.f90: New. + * gfortran.dg/graphite/graphite.exp: New. + 2008-09-02 Richard Guenther <rguenther@suse.de> PR tree-optimization/37327 diff --git a/gcc/testsuite/gcc.dg/graphite/block-0.c b/gcc/testsuite/gcc.dg/graphite/block-0.c new file mode 100644 index 0000000..f277f05 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/block-0.c @@ -0,0 +1,25 @@ +/* { dg-options "-O -floop-block -fdump-tree-graphite-all" } */ + +#define N 1000 + +int toto() +{ + int j; + int i; + int a[N]; + int b[N]; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + a[j] = a[i] + 1; + + return a[0]; +} + +main() +{ + return toto(); +} + +/* { dg-final { scan-tree-dump-times "Loop blocked" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/block-1.c b/gcc/testsuite/gcc.dg/graphite/block-1.c new file mode 100644 index 0000000..039b974 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/block-1.c @@ -0,0 +1,31 @@ +/* { dg-options "-O2 -floop-block -fdump-tree-graphite-all" } */ + +#define MAX 8192 + +int main() +{ + int i, j; + int sum = 0; + int A[MAX * MAX]; + int B[MAX * MAX]; + + for (i = 0; i < MAX; i++) + for (j = 0; j < MAX; j++) + { + A[i*MAX + j] = j; + B[i*MAX + j] = j; + } + + for (i = 0; i < MAX; i++) + for (j = 0; j < MAX; j++) + A[i*MAX + j] += B[j*MAX + i]; + + for(i = 0; i < MAX; i++) + for(j = 0; j < MAX; j++) + sum += A[i*MAX + j]; + + return sum; +} + +/* { dg-final { scan-tree-dump-times "Loop blocked" 3 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/graphite.exp b/gcc/testsuite/gcc.dg/graphite/graphite.exp new file mode 100644 index 0000000..a125717 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/graphite.exp @@ -0,0 +1,48 @@ +# Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gcc-dg.exp + +if ![check_effective_target_fgraphite] { + return +} + +# The default action for a test is 'compile'. Save current default. +global dg-do-what-default +set save-dg-do-what-default ${dg-do-what-default} +set dg-do-what-default compile + +# If a testcase doesn't have special options, use these. +global DEFAULT_CFLAGS +if ![info exists DEFAULT_CFLAGS] then { + set DEFAULT_CFLAGS " -ansi -pedantic-errors" +} + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ + "" $DEFAULT_CFLAGS + +# Clean up. +set dg-do-what-default ${save-dg-do-what-default} + +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.dg/graphite/scop-0.c b/gcc/testsuite/gcc.dg/graphite/scop-0.c new file mode 100644 index 0000000..ea3ae06 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-0.c @@ -0,0 +1,24 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +int foo (void); +void bar (void); + +int toto() +{ + /* Scop 1. */ + int i, j, k; + int a[100][100]; + int b[100]; + int N = foo (); + + for (i = 0; i < 2*N+ 100; i++) + for (j = 0; j < 200; j++) + a[j][i] = a[j+1][10] + 2; + + return a[3][5] + b[1]; + /* End scop 1. */ +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 1" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ + diff --git a/gcc/testsuite/gcc.dg/graphite/scop-1.c b/gcc/testsuite/gcc.dg/graphite/scop-1.c new file mode 100644 index 0000000..ed6159f --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-1.c @@ -0,0 +1,33 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (void); + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 100; j++) + a[j][i] = a[j+1][i-1] + 2; + + b[i] = b[i-1] + 2; + + bar (); + + for (j = 1; j < 100; j++) + a[j][i] = a[j+1][i-1] + 2; + + b[i] = a[i-1][i] + 2; + + for (j = 1; j < 100; j++) + a[j][i] = a[j+1][i-1] + 2; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-10.c b/gcc/testsuite/gcc.dg/graphite/scop-10.c new file mode 100644 index 0000000..8aff2c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-10.c @@ -0,0 +1,33 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (void); + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 100; j++) + b[i+j] = b[i+j-1] + 2; + + if (i * 2 == i + 8) + bar (); + else + { + for (j = 1; j < 100; j++) + b[i+j] = b[i+j-1] + 2; + a[i][i] = 2; + } + + for (k = 1; k < 100; k++) + b[i+k] = b[i+k-5] + 2; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-11.c b/gcc/testsuite/gcc.dg/graphite/scop-11.c new file mode 100644 index 0000000..e5a0fdb --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-11.c @@ -0,0 +1,34 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (); + +int toto() +{ + int i,j, b; + int a[100]; + + if (i == 20) + { + for (j = 0; j <= 20; j++) + a[j] = b + i; + b = 3; + bar(); + } + else + { + if (i == 30) + { + for (j = 0; j <= 20; j++) + a[j] = b + i; + b = 5; + } + } + + for (j = 0; j <= 20; j++) + a[j] = b + i; + + return a[b]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-12.c b/gcc/testsuite/gcc.dg/graphite/scop-12.c new file mode 100644 index 0000000..0c13033 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-12.c @@ -0,0 +1,38 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (); + +int toto() +{ + int i,j, b; + int a[100]; + + switch (i) + { + + case 5: + for (j = 0; j <= 20; j++) + a[j] = b + i + 12; + break; + case 8: + for (j = 0; j <= 20; j++) + a[j] = b + i + 122; + break; + case 15: + for (j = 0; j <= 20; j++) + a[j] = b + i + 12; + break; + case 18: + for (j = 0; j <= 20; j++) + a[j] = b + i + 4; + break; + default: + for (j = 0; j <= 20; j++) + a[j] = b + i + 3; + } + + return a[b]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 5" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-13.c b/gcc/testsuite/gcc.dg/graphite/scop-13.c new file mode 100644 index 0000000..aa55e10 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-13.c @@ -0,0 +1,43 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (); + +int toto() +{ + int i,j, b; + int a[100]; + + if (i == 20) + { + b = 3; + goto B; + } + else + { + if (i == 30) + { + a[i] = b; + + + for (j = 0; j <= 20; j++) + a[j] = b + i; + + B: + + for (j = 0; j <= 20; j++) + a[j+b] = b + i; + + bar (); + } + else + { + a[i] = b + 3; + } + } + + + return a[b]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 2" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-14.c b/gcc/testsuite/gcc.dg/graphite/scop-14.c new file mode 100644 index 0000000..a707b01 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-14.c @@ -0,0 +1,27 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (); + +int toto() +{ + int i,j, b; + int a[100]; + + for (j = 0; j <= 20; j++) + { + a[j] = b + i; + + if (j * i == b) + break; + + a[j+b] = b + i; + } + + a[i] = b + 3; + + + return a[b]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-15.c b/gcc/testsuite/gcc.dg/graphite/scop-15.c new file mode 100644 index 0000000..7e24253 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-15.c @@ -0,0 +1,52 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +# define EXTERN(type, array) extern type array[] +typedef unsigned char uch; +typedef unsigned short ush; +EXTERN(uch, window); +EXTERN(ush, prev); +#ifndef WSIZE +# define WSIZE 0x8000 +#endif +#define MIN_MATCH 3 +#define MAX_MATCH 258 +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +#define MAX_DIST (WSIZE-MIN_LOOKAHEAD) +#define near +typedef unsigned IPos; +unsigned near max_chain_length; +extern unsigned near strstart; +unsigned int near prev_length; +#define NIL 0 +unsigned near good_match; +int near nice_match; +#define WMASK (WSIZE-1) +int longest_match(IPos cur_match) +{ + unsigned chain_length = max_chain_length; + register uch *scan = window + strstart; + register uch *match; + register int len; + int best_len = prev_length; + IPos limit = strstart > (IPos)MAX_DIST ? strstart - (IPos)MAX_DIST : NIL; + register uch *strend = window + strstart + MAX_MATCH; + register uch scan_end = scan[best_len]; + if (prev_length >= good_match) { + } + do { + if (match[best_len] != scan_end || + *++match != scan[1]) continue; + do { + } while (*++scan == *++match && *++scan == *++match && + scan < strend); + len = MAX_MATCH - (int)(strend - scan); + if (len > best_len) { + best_len = len; + if (len >= nice_match) break; + } + } while ((cur_match = prev[cur_match & WMASK]) > limit + && --chain_length != 0); + return best_len; +} +/* { dg-final { scan-tree-dump-times "number of SCoPs: 0" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-16.c b/gcc/testsuite/gcc.dg/graphite/scop-16.c new file mode 100644 index 0000000..42f7b6a --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-16.c @@ -0,0 +1,25 @@ +/* { dg-options "-O2 -floop-block -fdump-tree-graphite-all" } */ +#define N 10000 +void foo (int); +int test () +{ + int a[N][N]; + int b[N][N]; + unsigned i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + a[i][j] = i*j; + + for (j = 1; j < N; j++) + for (i = 0; i < N; i++) + a[i][j] = a[i][j-1] + b[i][j]; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + foo (a[i][j]); +} + +/* Interchange is legal for loops 0 and 1 of the first two SCoPs */ +/* { dg-final { scan-tree-dump-times "Interchange valid for loops 0 and 1:" 2 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-17.c b/gcc/testsuite/gcc.dg/graphite/scop-17.c new file mode 100644 index 0000000..4c1b0ca --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-17.c @@ -0,0 +1,24 @@ +/* { dg-options "-O2 -floop-block -fdump-tree-graphite-all" } */ +#define N 10000 +void foo (int); +int test () +{ + int a[N][N]; + unsigned i, j; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + a[i][j] = i*j; + + for (i = 1; i < N; i++) + for (j = 1; j < (N-1) ; j++) + a[i][j] = a[i-1][j+1] * a[i-1][j+1]/2; + + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + foo (a[i][j]); +} + +/* Interchange is not legal for loops 0 and 1 of SCoP 2. */ +/* { dg-final { scan-tree-dump-times "Interchange not valid for loops 0 and 1:" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-18.c b/gcc/testsuite/gcc.dg/graphite/scop-18.c new file mode 100644 index 0000000..fe2d5f4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-18.c @@ -0,0 +1,26 @@ +/* { dg-options "-O2 -floop-block -fdump-tree-graphite-all" } */ + +#define N 24 +#define M 1000 + +float A[1000][1000], B[1000][1000], C[1000][1000]; + +void test (void) +{ + int i, j, k; + + /* These loops contain too few iterations for being strip-mined by 64. */ + for (i = 0; i < 24; i++) + for (j = 0; j < 24; j++) + for (k = 0; k < 24; k++) + A[i][j] += B[i][k] * C[k][j]; + + /* These loops should still be strip mined. */ + for (i = 0; i < 1000; i++) + for (j = 0; j < 1000; j++) + for (k = 0; k < 1000; k++) + A[i][j] += B[i][k] * C[k][j]; +} + +/* { dg-final { scan-tree-dump-times "Strip Mining is not profitable" 3 "graphite" } } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-2.c b/gcc/testsuite/gcc.dg/graphite/scop-2.c new file mode 100644 index 0000000..cf25dcd --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-2.c @@ -0,0 +1,41 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (void); + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 100; j++) + for (k = 1; k < 100; k++) + a[j][k] = a[j+1][i-1] + 2; + + b[i] = b[i-1] + 2; + + bar (); + + for (j = 1; j < 100; j++) + a[j][i] = a[j+1][i-1] + 2; + + b[i] = b[i-1] + 2; + + bar (); + + for (j = 1; j < 100; j++) + a[j][i] = a[j+1][i-1] + 2; + + b[i] = a[i-1][i] + 2; + + for (j = 1; j < 100; j++) + a[j][i] = a[j+1][i-1] + 2; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 4" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-3.c b/gcc/testsuite/gcc.dg/graphite/scop-3.c new file mode 100644 index 0000000..1789e6b --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-3.c @@ -0,0 +1,30 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 80; j++) + a[j][i] = a[j+1][2*i-1*j] + 12; + + b[i] = b[i-1] + 10; + + for (j = 1; j < 60; j++) + a[j][i] = a[j+1][i-1] + 8; + + if (i == 23) + b[i] = a[i-1][i] + 6; + + for (j = 1; j < 40; j++) + a[j][i] = a[j+1][i-1] + 4; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-4.c b/gcc/testsuite/gcc.dg/graphite/scop-4.c new file mode 100644 index 0000000..515c53a --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-4.c @@ -0,0 +1,31 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (); + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 80; j++) + a[j][i] = a[j+1][2*i-1*j] + 12; + + b[i] = b[i-1] + 10; + + for (j = 1; j < 60; j++) + a[j][i] = a[j+1][i-1] + 8; + + bar (); + + if (i == 23) + b[i] = a[i-1][i] + 6; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 2" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-5.c b/gcc/testsuite/gcc.dg/graphite/scop-5.c new file mode 100644 index 0000000..697a28e --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-5.c @@ -0,0 +1,37 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (); + +int toto() +{ + int i,j, b; + int a[100]; + + if (i == 20) + { + for (j = 0; j <= 20; j++) + a[j] = b + i; + b = 3; + bar(); + } + else + { + if (i == 30) + { + for (j = 0; j <= 20; j++) + a[j] = b + i; + b = 5; + } + else + { + for (j = 0; j <= 20; j++) + a[j] = b + i; + b = 8; + } + } + + return a[b]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-6.c b/gcc/testsuite/gcc.dg/graphite/scop-6.c new file mode 100644 index 0000000..d262320 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-6.c @@ -0,0 +1,33 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (void); + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 100; j++) + b[i+j] = b[i+j-1] + 2; + + if (i * 2 == i + 8) + b[i+k] = b[i+k-1] + 2; + else + { + for (k = 1; k < 100; k++) + b[i+k] = b[i+k-1] + 2; + bar (); + } + + for (k = 1; k < 100; k++) + b[i+k] = b[i+k-5] + 2; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-7.c b/gcc/testsuite/gcc.dg/graphite/scop-7.c new file mode 100644 index 0000000..1187ce1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-7.c @@ -0,0 +1,33 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (void); + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 100; j++) + b[i+j] = b[i+j-1] + 2; + + if (i * 2 == i + 8) + { + bar (); + for (j = 1; j < 100; j++) + b[i+j] = b[i+j-1] + 2; + } + else + a[i][i] = 2; + + for (k = 1; k < 100; k++) + b[i+k] = b[i+k-5] + 2; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 3" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-8.c b/gcc/testsuite/gcc.dg/graphite/scop-8.c new file mode 100644 index 0000000..491ad37 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-8.c @@ -0,0 +1,33 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +int bar (void); + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 100; j++) + b[i+j] = b[i+j-1] + 2; + + if (i * 2 == i + 8) + { + for (j = 1; j < 100; j++) + if (bar ()) + b[i+j] = b[i+j-1] + 2; + } + else + a[i][i] = 2; + + for (k = 1; k < 100; k++) + b[i+k] = b[i+k-5] + 2; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 2" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-9.c b/gcc/testsuite/gcc.dg/graphite/scop-9.c new file mode 100644 index 0000000..871b86b --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-9.c @@ -0,0 +1,29 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +void bar (void); + +int toto() +{ + int i, j, k; + int a[100][100]; + int b[100]; + + for (i = 1; i < 100; i++) + { + for (j = 1; j < 100; j++) + b[i+j] = b[i+j-1] + 2; + + if (i * 2 == i + 8) + bar (); + else + a[i][i] = 2; + + for (k = 1; k < 100; k++) + b[i+k] = b[i+k-5] + 2; + } + + return a[3][5] + b[1]; +} + +/* { dg-final { scan-tree-dump-times "number of SCoPs: 2" 1 "graphite"} } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gcc.dg/graphite/scop-matmult.c b/gcc/testsuite/gcc.dg/graphite/scop-matmult.c new file mode 100644 index 0000000..61a5be1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/scop-matmult.c @@ -0,0 +1,20 @@ +/* { dg-options "-O2 -fgraphite -fdump-tree-graphite-all" } */ + +float A[1000][1000], B[1000][1000], C[1000][1000]; + +/* Multiply two n x n matrices A and B and store the result in C. */ + +void matmult (int n) +{ + int i,j,k; + + for (i = 0; i < n; i++) + for (j = 0; j < n; j++) + for (k = 0; k < n; k++) + A[i][j] += B[i][k] * C[k][j]; +} + +/* This one fails because the number of iterations cannot be + determined anymore for the outermost loop. */ +/* { dg-final { scan-tree-dump-times "number of SCoPs: 1" 1 "graphite" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "graphite" } } */ diff --git a/gcc/testsuite/gfortran.dg/graphite/block-1.f90 b/gcc/testsuite/gfortran.dg/graphite/block-1.f90 new file mode 100644 index 0000000..124f06d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/graphite/block-1.f90 @@ -0,0 +1,14 @@ +! { dg-options "-O2 -floop-block -fdump-tree-graphite-all" } + +subroutine matrix_multiply(a,b,c,n) + +real(8), dimension(n,n) :: a,b,c + +! The following code is disabled for the moment. +! c=0.d0 + +end subroutine matrix_multiply + +! { dg-final { scan-tree-dump-times "Loop blocked" 2 "graphite" { xfail *-*-* } } } +! { dg-final { cleanup-tree-dump "graphite" } } + diff --git a/gcc/testsuite/gfortran.dg/graphite/block-2.f b/gcc/testsuite/gfortran.dg/graphite/block-2.f new file mode 100644 index 0000000..af966ec --- /dev/null +++ b/gcc/testsuite/gfortran.dg/graphite/block-2.f @@ -0,0 +1,22 @@ +! { dg-options "-O2 -floop-block -fdump-tree-graphite-all" } + + SUBROUTINE MATRIX_MUL_UNROLLED (A, B, C, L, M, N) + DIMENSION A(L,M), B(M,N), C(L,N) + + DO 100 K = 1, N + DO 100 I = 1, L + C(I,K) = 0. +100 CONTINUE + DO 110 J = 1, M, 4 + DO 110 K = 1, N + DO 110 I = 1, L + C(I,K) = C(I,K) + A(I,J) * B(J,K) + $ + A(I,J+1) * B(J+1,K) + A(I,J+2) * B(J+2,K) + $ + A(I,J+3) * B(J+3,K) +110 CONTINUE + + RETURN + END + +! { dg-final { scan-tree-dump-times "Loop blocked" 3 "graphite" { xfail { "*-*-*" } } } } +! { dg-final { cleanup-tree-dump "graphite" } } diff --git a/gcc/testsuite/gfortran.dg/graphite/block-3.f90 b/gcc/testsuite/gfortran.dg/graphite/block-3.f90 new file mode 100644 index 0000000..c7809d3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/graphite/block-3.f90 @@ -0,0 +1,19 @@ +! { dg-options "-O2 -floop-block -fdump-tree-graphite-all" } + +subroutine matrix_multiply(a,b,c,n) + +real(8), dimension(n,n) :: a,b,c + +do i = 1,n + do j = 1,n + do k = 1,n + c(j,i) = c(j,i) + a(k,i) * b(j,k) + enddo + enddo +enddo + +end subroutine matrix_multiply + +! { dg-final { scan-tree-dump-times "Loop blocked" 2 "graphite" { xfail *-*-* } } } +! { dg-final { cleanup-tree-dump "graphite" } } + diff --git a/gcc/testsuite/gfortran.dg/graphite/block-4.f90 b/gcc/testsuite/gfortran.dg/graphite/block-4.f90 new file mode 100644 index 0000000..586a777 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/graphite/block-4.f90 @@ -0,0 +1,22 @@ +! { dg-options "-O2 -floop-block -fdump-tree-graphite-all" } + +subroutine matrix_multiply(a,b,c,n) + +real(8), dimension(n,n) :: a,b,c + +! The following code is disabled for the moment. +! c=0.d0 + +do i = 1,n + do j = 1,n + do k = 1,n + c(j,i) = c(j,i) + a(k,i) * b(j,k) + enddo + enddo +enddo + +end subroutine matrix_multiply + +! { dg-final { scan-tree-dump-times "Loop blocked" 2 "graphite" { xfail *-*-* } } } +! { dg-final { cleanup-tree-dump "graphite" } } + diff --git a/gcc/testsuite/gfortran.dg/graphite/graphite.exp b/gcc/testsuite/gfortran.dg/graphite/graphite.exp new file mode 100644 index 0000000..a9fdb2c --- /dev/null +++ b/gcc/testsuite/gfortran.dg/graphite/graphite.exp @@ -0,0 +1,48 @@ +# Copyright (C) 2008 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# GCC testsuite that uses the `dg.exp' driver. + +# Load support procs. +load_lib gfortran-dg.exp + +if ![check_effective_target_fgraphite] { + return +} + +# The default action for a test is 'compile'. Save current default. +global dg-do-what-default +set save-dg-do-what-default ${dg-do-what-default} +set dg-do-what-default compile + +# If a testcase doesn't have special options, use these. +set DEFAULT_GRAPHITE_FLAGS "" + +# Initialize `dg'. +dg-init + +# Main loop. +gfortran-dg-runtest [lsort \ + [glob -nocomplain $srcdir/$subdir/*.\[fF\]{,90,95,03,08} ] ] $DEFAULT_GRAPHITE_FLAGS + +gfortran-dg-runtest [lsort \ + [glob -nocomplain $srcdir/$subdir/g77/*.\[fF\] ] ] $DEFAULT_GRAPHITE_FLAGS + +# Clean up. +set dg-do-what-default ${save-dg-do-what-default} + +# All done. +dg-finish diff --git a/gcc/testsuite/gfortran.dg/graphite/scop-1.f b/gcc/testsuite/gfortran.dg/graphite/scop-1.f new file mode 100644 index 0000000..a279aba --- /dev/null +++ b/gcc/testsuite/gfortran.dg/graphite/scop-1.f @@ -0,0 +1,13 @@ +C { dg-options "-O2 -fgraphite" } + + dimension p1(2),t(6,4),b1(2),b2(2),al1(2),al2(2),g1(2),g2(2) + save + if(nlin.eq.0) then + do 20 l=1,2 + ll=2*l + b2(l)=t(6-ll,ll-1)*t(6-ll,ll-1)+t(7-ll,ll-1)*t(7-ll,ll-1) + write(*,*) b2(l) + 20 continue + endif + end + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index c9512b2..527d277 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -562,6 +562,15 @@ proc check_effective_target_tls_runtime {} { }] } +# Return 1 if compilation with -fgraphite is error-free for trivial +# code, 0 otherwise. + +proc check_effective_target_fgraphite {} { + return [check_no_compiler_messages fgraphite object { + void foo (void) { } + } "-fgraphite"] +} + # Return 1 if compilation with -fopenmp is error-free for trivial # code, 0 otherwise. diff --git a/gcc/timevar.def b/gcc/timevar.def index 94fa087..4351f17 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -125,6 +125,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") +DEFTIMEVAR (TV_GRAPHITE_TRANSFORMS , "GRAPHITE loop transforms") DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear") DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution") DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences") diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index c485027..f63e676 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -6140,7 +6140,7 @@ print_loops (FILE *file, int verbosity) { basic_block bb; - bb = BASIC_BLOCK (NUM_FIXED_BLOCKS); + bb = ENTRY_BLOCK_PTR; if (bb && bb->loop_father) print_loop_and_siblings (file, bb->loop_father, 0, verbosity); } diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c index da35952..26ae9b4 100644 --- a/gcc/tree-chrec.c +++ b/gcc/tree-chrec.c @@ -1407,3 +1407,26 @@ scev_direction (const_tree chrec) else return EV_DIR_GROWS; } + +/* Iterates over all the components of SCEV, and calls CBCK. */ + +void +for_each_scev_op (tree *scev, bool (*cbck) (tree *, void *), void *data) +{ + switch (TREE_CODE_LENGTH (TREE_CODE (*scev))) + { + case 3: + for_each_scev_op (&TREE_OPERAND (*scev, 2), cbck, data); + + case 2: + for_each_scev_op (&TREE_OPERAND (*scev, 1), cbck, data); + + case 1: + for_each_scev_op (&TREE_OPERAND (*scev, 0), cbck, data); + + default: + cbck (scev, data); + break; + } +} + diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h index 9000fb7..d35dcd3 100644 --- a/gcc/tree-chrec.h +++ b/gcc/tree-chrec.h @@ -70,6 +70,7 @@ extern tree evolution_part_in_loop_num (tree, unsigned); extern tree hide_evolution_in_other_loops_than_loop (tree, unsigned); extern tree reset_evolution_in_loop (unsigned, tree, tree); extern tree chrec_merge (tree, tree); +extern void for_each_scev_op (tree *, bool (*) (tree *, void *), void *); /* Observers. */ extern bool eq_evolutions_p (const_tree, const_tree); diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 85d0977..af1146c 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1224,7 +1224,7 @@ disjoint_objects_p (tree a, tree b) /* Returns false if we can prove that data references A and B do not alias, true otherwise. */ -static bool +bool dr_may_alias_p (const struct data_reference *a, const struct data_reference *b) { const_tree addr_a = DR_BASE_ADDRESS (a); @@ -3303,6 +3303,21 @@ access_functions_are_affine_or_constant_p (const struct data_reference *a, return true; } +/* Return true if we can create an affine data-ref for OP in STMT. */ + +bool +stmt_simple_memref_p (struct loop *loop, gimple stmt, tree op) +{ + data_reference_p dr; + + dr = create_data_ref (loop, op, stmt, true); + if (!access_functions_are_affine_or_constant_p (dr, loop)) + return false; + + free_data_ref (dr); + return true; +} + /* Initializes an equation for an OMEGA problem using the information contained in the ACCESS_FUN. Returns true when the operation succeeded. @@ -4069,9 +4084,9 @@ get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references) /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable reference, returns false, otherwise returns true. NEST is the outermost - loop of the loop nest in that the references should be analyzed. */ + loop of the loop nest in which the references should be analyzed. */ -static bool +bool find_data_references_in_stmt (struct loop *nest, gimple stmt, VEC (data_reference_p, heap) **datarefs) { @@ -4116,7 +4131,7 @@ find_data_references_in_stmt (struct loop *nest, gimple stmt, TODO: This function should be made smarter so that it can handle address arithmetic as if they were array accesses, etc. */ -static tree +tree find_data_references_in_loop (struct loop *loop, VEC (data_reference_p, heap) **datarefs) { @@ -4644,6 +4659,7 @@ create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr) e->data = XNEW (struct rdg_edge); RDGE_LEVEL (e) = level; + RDGE_RELATION (e) = ddr; /* Determines the type of the data dependence. */ if (DR_IS_READ (dra) && DR_IS_READ (drb)) @@ -4676,6 +4692,7 @@ create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) e = add_edge (rdg, idef, use); e->data = XNEW (struct rdg_edge); RDGE_TYPE (e) = flow_dd; + RDGE_RELATION (e) = NULL; } } @@ -4701,7 +4718,7 @@ create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs) /* Build the vertices of the reduced dependence graph RDG. */ -static void +void create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts) { int i, j; @@ -4826,6 +4843,21 @@ hash_stmt_vertex_del (void *e) scalar dependence. */ struct graph * +build_empty_rdg (int n_stmts) +{ + int nb_data_refs = 10; + struct graph *rdg = new_graph (n_stmts); + + rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info, + eq_stmt_vertex_info, hash_stmt_vertex_del); + return rdg; +} + +/* Build the Reduced Dependence Graph (RDG) with one vertex per + statement of the loop nest, and one edge per data dependence or + scalar dependence. */ + +struct graph * build_rdg (struct loop *loop) { int nb_data_refs = 10; @@ -4842,21 +4874,23 @@ build_rdg (struct loop *loop) &dependence_relations); if (!known_dependences_p (dependence_relations)) - goto end_rdg; + { + free_dependence_relations (dependence_relations); + free_data_refs (datarefs); + VEC_free (gimple, heap, stmts); + + return rdg; + } stmts_from_loop (loop, &stmts); - rdg = new_graph (VEC_length (gimple, stmts)); + rdg = build_empty_rdg (VEC_length (gimple, stmts)); rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info, eq_stmt_vertex_info, hash_stmt_vertex_del); create_rdg_vertices (rdg, stmts); create_rdg_edges (rdg, dependence_relations); - end_rdg: - free_dependence_relations (dependence_relations); - free_data_refs (datarefs); VEC_free (gimple, heap, stmts); - return rdg; } diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index 639a32b..ee514c5 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -96,6 +96,8 @@ struct dr_alias bitmap vops; }; +typedef struct scop *scop_p; + /* Each vector of the access matrix represents a linear access function for a subscript. First elements correspond to the leftmost indices, ie. for a[i][j] the first vector corresponds to @@ -170,16 +172,20 @@ struct data_reference /* Behavior of the memory reference in the innermost loop. */ struct innermost_loop_behavior innermost; - /* Decomposition to indices for alias analysis. */ + /* Subscripts of this data reference. */ struct indices indices; /* Alias information for the data reference. */ struct dr_alias alias; + /* The SCoP in which the data reference was analyzed. */ + scop_p scop; + /* Matrix representation for the data access functions. */ struct access_matrix *access_matrix; }; +#define DR_SCOP(DR) (DR)->scop #define DR_STMT(DR) (DR)->stmt #define DR_REF(DR) (DR)->ref #define DR_BASE_OBJECT(DR) (DR)->indices.base_object @@ -373,6 +379,8 @@ void dr_analyze_innermost (struct data_reference *); extern bool compute_data_dependences_for_loop (struct loop *, bool, VEC (data_reference_p, heap) **, VEC (ddr_p, heap) **); +extern tree find_data_references_in_loop (struct loop *, + VEC (data_reference_p, heap) **); extern void print_direction_vector (FILE *, lambda_vector, int); extern void print_dir_vectors (FILE *, VEC (lambda_vector, heap) *, int); extern void print_dist_vectors (FILE *, VEC (lambda_vector, heap) *, int); @@ -392,10 +400,18 @@ extern void free_dependence_relation (struct data_dependence_relation *); extern void free_dependence_relations (VEC (ddr_p, heap) *); extern void free_data_ref (data_reference_p); extern void free_data_refs (VEC (data_reference_p, heap) *); +extern bool find_data_references_in_stmt (struct loop *, gimple, + VEC (data_reference_p, heap) **); struct data_reference *create_data_ref (struct loop *, tree, gimple, bool); -bool find_loop_nest (struct loop *, VEC (loop_p, heap) **); -void compute_all_dependences (VEC (data_reference_p, heap) *, - VEC (ddr_p, heap) **, VEC (loop_p, heap) *, bool); +extern bool find_loop_nest (struct loop *, VEC (loop_p, heap) **); +extern void compute_all_dependences (VEC (data_reference_p, heap) *, + VEC (ddr_p, heap) **, VEC (loop_p, heap) *, + bool); + +extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *); +extern bool dr_may_alias_p (const struct data_reference *, + const struct data_reference *); +extern bool stmt_simple_memref_p (struct loop *, gimple, tree); /* Return true when the DDR contains two data references that have the same access functions. */ @@ -511,15 +527,21 @@ typedef struct rdg_edge /* Type of the dependence. */ enum rdg_dep_type type; - /* Levels of the dependence: the depth of the loops that - carry the dependence. */ + /* Levels of the dependence: the depth of the loops that carry the + dependence. */ unsigned level; + + /* Dependence relation between data dependences, NULL when one of + the vertices is a scalar. */ + ddr_p relation; } *rdg_edge_p; #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type #define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level +#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation struct graph *build_rdg (struct loop *); +struct graph *build_empty_rdg (int); void free_rdg (struct graph *); /* Return the index of the variable VAR in the LOOP_NEST array. */ @@ -561,7 +583,21 @@ void lambda_collect_parameters (VEC (data_reference_p, heap) *, bool lambda_compute_access_matrices (VEC (data_reference_p, heap) *, VEC (tree, heap) *, int); -/* In tree-data-refs.c */ +/* In tree-data-ref.c */ void split_constant_offset (tree , tree *, tree *); +/* Strongly connected components of the reduced data dependence graph. */ + +typedef struct rdg_component +{ + int num; + VEC (int, heap) *vertices; +} *rdgc; + +DEF_VEC_P (rdgc); +DEF_VEC_ALLOC_P (rdgc, heap); + +DEF_VEC_P (bitmap); +DEF_VEC_ALLOC_P (bitmap, heap); + #endif /* GCC_TREE_DATA_REF_H */ diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index eec48d9..c75afb0 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -786,6 +786,8 @@ extern gimple get_single_def_stmt_with_phi (tree, gimple); /* In tree-phinodes.c */ extern void reserve_phi_args_for_new_edge (basic_block); +extern void add_phi_node_to_bb (gimple phi, basic_block bb); +extern gimple make_phi_node (tree var, int len); extern gimple create_phi_node (tree, basic_block); extern void add_phi_arg (gimple, tree, edge); extern void remove_phi_args (edge); @@ -1023,6 +1025,7 @@ bool gimple_duplicate_loop_to_header_edge (struct loop *, edge, int); struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge); void rename_variables_in_loop (struct loop *); +void rename_variables_in_bb (basic_block bb); struct loop *tree_ssa_loop_version (struct loop *, tree, basic_block *); tree expand_simple_operations (tree); @@ -1116,6 +1119,10 @@ bool sra_type_can_be_decomposed_p (tree); /* In tree-loop-linear.c */ extern void linear_transform_loops (void); +extern unsigned perfect_loop_nest_depth (struct loop *); + +/* In graphite.c */ +extern void graphite_transform_loops (void); /* In tree-data-ref.c */ extern void tree_check_data_deps (void); diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c index f0c5590..928333b 100644 --- a/gcc/tree-into-ssa.c +++ b/gcc/tree-into-ssa.c @@ -130,12 +130,6 @@ static bitmap mem_syms_to_rename; released after we finish updating the SSA web. */ static bitmap names_to_release; -/* For each block, the PHI nodes that need to be rewritten are stored into - these vectors. */ -typedef VEC(gimple, heap) *gimple_vec; -DEF_VEC_P (gimple_vec); -DEF_VEC_ALLOC_P (gimple_vec, heap); - static VEC(gimple_vec, heap) *phis_to_rewrite; /* The bitmap of non-NULL elements of PHIS_TO_REWRITE. */ diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index d86391e..ed54cf3 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -710,17 +710,6 @@ rdg_flag_loop_exits (struct graph *rdg, bitmap loops, bitmap partition, } } -/* Strongly connected components of the reduced data dependence graph. */ - -typedef struct rdg_component -{ - int num; - VEC (int, heap) *vertices; -} *rdgc; - -DEF_VEC_P (rdgc); -DEF_VEC_ALLOC_P (rdgc, heap); - /* Flag all the nodes of RDG containing memory accesses that could potentially belong to arrays already accessed in the current PARTITION. */ @@ -864,9 +853,6 @@ rdg_build_components (struct graph *rdg, VEC (int, heap) *starting_vertices, BITMAP_FREE (saved_components); } -DEF_VEC_P (bitmap); -DEF_VEC_ALLOC_P (bitmap, heap); - /* Aggregate several components into a useful partition that is registered in the PARTITIONS vector. Partitions will be distributed in different loops. */ diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c index 66d25ec..7e5f298 100644 --- a/gcc/tree-loop-linear.c +++ b/gcc/tree-loop-linear.c @@ -272,7 +272,7 @@ try_interchange_loops (lambda_trans_matrix trans, /* Return the number of nested loops in LOOP_NEST, or 0 if the loops are not perfectly nested. */ -static unsigned int +unsigned int perfect_loop_nest_depth (struct loop *loop_nest) { struct loop *temp; diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index a3c9b26..7fe1510 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -322,6 +322,7 @@ extern struct gimple_opt_pass pass_iv_canon; extern struct gimple_opt_pass pass_scev_cprop; extern struct gimple_opt_pass pass_empty_loop; extern struct gimple_opt_pass pass_record_bounds; +extern struct gimple_opt_pass pass_graphite_transforms; extern struct gimple_opt_pass pass_if_conversion; extern struct gimple_opt_pass pass_loop_distribution; extern struct gimple_opt_pass pass_vectorize; diff --git a/gcc/tree-phinodes.c b/gcc/tree-phinodes.c index 511e84b..7fd5a8f 100644 --- a/gcc/tree-phinodes.c +++ b/gcc/tree-phinodes.c @@ -202,10 +202,9 @@ ideal_phi_node_len (int len) return new_len; } - /* Return a PHI node with LEN argument slots for variable VAR. */ -static gimple +gimple make_phi_node (tree var, int len) { gimple phi; @@ -348,15 +347,12 @@ reserve_phi_args_for_new_edge (basic_block bb) } } +/* Adds PHI to BB. */ -/* Create a new PHI node for variable VAR at basic block BB. */ - -gimple -create_phi_node (tree var, basic_block bb) +void +add_phi_node_to_bb (gimple phi, basic_block bb) { gimple_stmt_iterator gsi; - gimple phi = make_phi_node (var, EDGE_COUNT (bb->preds)); - /* Add the new PHI node to the list of PHI nodes for block BB. */ if (phi_nodes (bb) == NULL) set_phi_nodes (bb, gimple_seq_alloc ()); @@ -367,6 +363,16 @@ create_phi_node (tree var, basic_block bb) /* Associate BB to the PHI node. */ gimple_set_bb (phi, bb); +} + +/* Create a new PHI node for variable VAR at basic block BB. */ + +gimple +create_phi_node (tree var, basic_block bb) +{ + gimple phi = make_phi_node (var, EDGE_COUNT (bb->preds)); + + add_phi_node_to_bb (phi, bb); return phi; } diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 8fbb27a..341f41c 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -5021,18 +5021,38 @@ create_new_ivs (struct ivopts_data *data, struct iv_ca *set) } } +/* Returns the phi-node in BB with result RESULT. */ + +static gimple +get_phi_with_result (basic_block bb, tree result) +{ + gimple_stmt_iterator i = gsi_start_phis (bb); + + for (; !gsi_end_p (i); gsi_next (&i)) + if (gimple_phi_result (gsi_stmt (i)) == result) + return gsi_stmt (i); + + gcc_unreachable (); + return NULL; +} + + /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME is true, remove also the ssa name defined by the statement. */ static void remove_statement (gimple stmt, bool including_defined_name) { - gimple_stmt_iterator bsi = gsi_for_stmt (stmt); - if (gimple_code (stmt) == GIMPLE_PHI) - remove_phi_node (&bsi, including_defined_name); + { + gimple bb_phi = get_phi_with_result (gimple_bb (stmt), + gimple_phi_result (stmt)); + gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi); + remove_phi_node (&bsi, including_defined_name); + } else { + gimple_stmt_iterator bsi = gsi_for_stmt (stmt); gsi_remove (&bsi, true); release_defs (stmt); } diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index ec3782a..51fc07c 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -287,6 +287,49 @@ struct gimple_opt_pass pass_linear_transform = } }; +/* GRAPHITE optimizations. */ + +static unsigned int +graphite_transforms (void) +{ + if (!current_loops) + return 0; + + graphite_transform_loops (); + + return 0; +} + +static bool +gate_graphite_transforms (void) +{ + /* Enable -fgraphite pass if any one of the graphite optimization flags + is turned on. */ + if (flag_loop_block || flag_loop_interchange || flag_loop_strip_mine) + flag_graphite = 1; + + return flag_graphite != 0; +} + +struct gimple_opt_pass pass_graphite_transforms = +{ + { + GIMPLE_PASS, + "graphite", /* name */ + gate_graphite_transforms, /* gate */ + graphite_transforms, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_GRAPHITE_TRANSFORMS, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_loops /* todo_flags_finish */ + } +}; + /* Check the correctness of the data dependence analyzers. */ static unsigned int diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 474860a..40d1302 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -201,7 +201,7 @@ rename_use_op (use_operand_p op_p) /* Renames the variables in basic block BB. */ -static void +void rename_variables_in_bb (basic_block bb) { gimple_stmt_iterator gsi; |